use reqwest client for epic speedup

This commit is contained in:
oliver 2024-11-10 20:37:00 -07:00
parent ec66c4e765
commit 399510c599

View File

@ -27,12 +27,17 @@ async fn main() {
let db = connect().await.expect("Failed to connect to db, aborting."); let db = connect().await.expect("Failed to connect to db, aborting.");
let client = reqwest::Client::builder()
// .use_rustls_tls()
.build()
.unwrap();
// Kick off the whole machine - This Website object doesn't matter, it's just to allow for // Kick off the whole machine - This Website object doesn't matter, it's just to allow for
// get() to work. // get() to work.
let span = trace_span!("Pre-Loop"); let span = trace_span!("Pre-Loop");
let pre_loop_span = span.enter(); let pre_loop_span = span.enter();
let mut site = Website::new(&url, false); let mut site = Website::new(&url, false);
let dom = get(&mut site, &db).await.expect("Inital page returned None."); let dom = get(&mut site, &db, &client).await.expect("Inital page returned None.");
crawl_wrapper(&dom, &db, &site, &mut crawled).await; crawl_wrapper(&dom, &db, &site, &mut crawled).await;
drop(pre_loop_span); drop(pre_loop_span);
@ -50,7 +55,7 @@ async fn main() {
let _ = span.enter(); let _ = span.enter();
for mut site in uncrawled { for mut site in uncrawled {
if let Some(dom) = get(&mut site, &db).await { if let Some(dom) = get(&mut site, &db, &client).await {
crawl_wrapper(&dom, &db, &site, &mut crawled).await; crawl_wrapper(&dom, &db, &site, &mut crawled).await;
let percent = format!("{:.2}%", (crawled as f32/budget as f32) * 100f32); let percent = format!("{:.2}%", (crawled as f32/budget as f32) * 100f32);
info!("Crawled {crawled} out of {budget} pages. ({percent})"); info!("Crawled {crawled} out of {budget} pages. ({percent})");
@ -75,10 +80,11 @@ async fn crawl_wrapper(dom: &Rc<Node>, db: &Surreal<Client>, site: &Website, cou
#[instrument(skip_all)] #[instrument(skip_all)]
/// A quick helper function for downloading a url /// A quick helper function for downloading a url
async fn get(site: &mut Website, db: &Surreal<Client>) -> Option<Rc<Node>> { async fn get(site: &mut Website, db: &Surreal<Client>, getter: &reqwest::Client) -> Option<Rc<Node>> {
trace!("Get: {}", site.to_string()); trace!("Get: {}", site.to_string());
let timer = Timer::start("Got page"); let timer = Timer::start("Got page");
if let Ok(response) = reqwest::get(site.to_string()).await {
if let Ok(response) = getter.get(site.to_string()).send().await {
drop(timer); drop(timer);
let data = response.text().await.unwrap(); let data = response.text().await.unwrap();