use reqwest client for epic speedup
This commit is contained in:
parent
ec66c4e765
commit
399510c599
14
src/main.rs
14
src/main.rs
@ -27,12 +27,17 @@ async fn main() {
|
|||||||
|
|
||||||
let db = connect().await.expect("Failed to connect to db, aborting.");
|
let db = connect().await.expect("Failed to connect to db, aborting.");
|
||||||
|
|
||||||
|
let client = reqwest::Client::builder()
|
||||||
|
// .use_rustls_tls()
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
// Kick off the whole machine - This Website object doesn't matter, it's just to allow for
|
// Kick off the whole machine - This Website object doesn't matter, it's just to allow for
|
||||||
// get() to work.
|
// get() to work.
|
||||||
let span = trace_span!("Pre-Loop");
|
let span = trace_span!("Pre-Loop");
|
||||||
let pre_loop_span = span.enter();
|
let pre_loop_span = span.enter();
|
||||||
let mut site = Website::new(&url, false);
|
let mut site = Website::new(&url, false);
|
||||||
let dom = get(&mut site, &db).await.expect("Inital page returned None.");
|
let dom = get(&mut site, &db, &client).await.expect("Inital page returned None.");
|
||||||
crawl_wrapper(&dom, &db, &site, &mut crawled).await;
|
crawl_wrapper(&dom, &db, &site, &mut crawled).await;
|
||||||
drop(pre_loop_span);
|
drop(pre_loop_span);
|
||||||
|
|
||||||
@ -50,7 +55,7 @@ async fn main() {
|
|||||||
let _ = span.enter();
|
let _ = span.enter();
|
||||||
|
|
||||||
for mut site in uncrawled {
|
for mut site in uncrawled {
|
||||||
if let Some(dom) = get(&mut site, &db).await {
|
if let Some(dom) = get(&mut site, &db, &client).await {
|
||||||
crawl_wrapper(&dom, &db, &site, &mut crawled).await;
|
crawl_wrapper(&dom, &db, &site, &mut crawled).await;
|
||||||
let percent = format!("{:.2}%", (crawled as f32/budget as f32) * 100f32);
|
let percent = format!("{:.2}%", (crawled as f32/budget as f32) * 100f32);
|
||||||
info!("Crawled {crawled} out of {budget} pages. ({percent})");
|
info!("Crawled {crawled} out of {budget} pages. ({percent})");
|
||||||
@ -75,10 +80,11 @@ async fn crawl_wrapper(dom: &Rc<Node>, db: &Surreal<Client>, site: &Website, cou
|
|||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
/// A quick helper function for downloading a url
|
/// A quick helper function for downloading a url
|
||||||
async fn get(site: &mut Website, db: &Surreal<Client>) -> Option<Rc<Node>> {
|
async fn get(site: &mut Website, db: &Surreal<Client>, getter: &reqwest::Client) -> Option<Rc<Node>> {
|
||||||
trace!("Get: {}", site.to_string());
|
trace!("Get: {}", site.to_string());
|
||||||
let timer = Timer::start("Got page");
|
let timer = Timer::start("Got page");
|
||||||
if let Ok(response) = reqwest::get(site.to_string()).await {
|
|
||||||
|
if let Ok(response) = getter.get(site.to_string()).send().await {
|
||||||
drop(timer);
|
drop(timer);
|
||||||
|
|
||||||
let data = response.text().await.unwrap();
|
let data = response.text().await.unwrap();
|
||||||
|
Loading…
Reference in New Issue
Block a user