Compare commits

..

2 Commits

Author SHA1 Message Date
399510c599 use reqwest client for epic speedup 2024-11-10 20:37:00 -07:00
ec66c4e765 remove unused import 2024-11-10 20:36:39 -07:00
2 changed files with 11 additions and 5 deletions

View File

@ -5,7 +5,7 @@ use surrealdb::{
sql::Thing,
Response, Surreal,
};
use tracing::{debug, error, instrument, trace, warn};
use tracing::{error, instrument, trace, warn};
use url::Url;
use crate::Timer;

View File

@ -27,12 +27,17 @@ async fn main() {
let db = connect().await.expect("Failed to connect to db, aborting.");
let client = reqwest::Client::builder()
// .use_rustls_tls()
.build()
.unwrap();
// Kick off the whole machine - This Website object doesn't matter, it's just to allow for
// get() to work.
let span = trace_span!("Pre-Loop");
let pre_loop_span = span.enter();
let mut site = Website::new(&url, false);
let dom = get(&mut site, &db).await.expect("Inital page returned None.");
let dom = get(&mut site, &db, &client).await.expect("Inital page returned None.");
crawl_wrapper(&dom, &db, &site, &mut crawled).await;
drop(pre_loop_span);
@ -50,7 +55,7 @@ async fn main() {
let _ = span.enter();
for mut site in uncrawled {
if let Some(dom) = get(&mut site, &db).await {
if let Some(dom) = get(&mut site, &db, &client).await {
crawl_wrapper(&dom, &db, &site, &mut crawled).await;
let percent = format!("{:.2}%", (crawled as f32/budget as f32) * 100f32);
info!("Crawled {crawled} out of {budget} pages. ({percent})");
@ -75,10 +80,11 @@ async fn crawl_wrapper(dom: &Rc<Node>, db: &Surreal<Client>, site: &Website, cou
#[instrument(skip_all)]
/// A quick helper function for downloading a url
async fn get(site: &mut Website, db: &Surreal<Client>) -> Option<Rc<Node>> {
async fn get(site: &mut Website, db: &Surreal<Client>, getter: &reqwest::Client) -> Option<Rc<Node>> {
trace!("Get: {}", site.to_string());
let timer = Timer::start("Got page");
if let Ok(response) = reqwest::get(site.to_string()).await {
if let Ok(response) = getter.get(site.to_string()).send().await {
drop(timer);
let data = response.text().await.unwrap();