use reqwest client for epic speedup
This commit is contained in:
		
							
								
								
									
										14
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								src/main.rs
									
									
									
									
									
								
							@@ -27,12 +27,17 @@ async fn main() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    let db = connect().await.expect("Failed to connect to db, aborting.");
 | 
					    let db = connect().await.expect("Failed to connect to db, aborting.");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let client = reqwest::Client::builder()
 | 
				
			||||||
 | 
					        // .use_rustls_tls()
 | 
				
			||||||
 | 
					        .build()
 | 
				
			||||||
 | 
					        .unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Kick off the whole machine - This Website object doesn't matter, it's just to allow for
 | 
					    // Kick off the whole machine - This Website object doesn't matter, it's just to allow for
 | 
				
			||||||
    // get() to work.
 | 
					    // get() to work.
 | 
				
			||||||
    let span = trace_span!("Pre-Loop");
 | 
					    let span = trace_span!("Pre-Loop");
 | 
				
			||||||
    let pre_loop_span = span.enter();
 | 
					    let pre_loop_span = span.enter();
 | 
				
			||||||
    let mut site = Website::new(&url, false);
 | 
					    let mut site = Website::new(&url, false);
 | 
				
			||||||
    let dom = get(&mut site, &db).await.expect("Inital page returned None.");
 | 
					    let dom = get(&mut site, &db, &client).await.expect("Inital page returned None.");
 | 
				
			||||||
    crawl_wrapper(&dom, &db, &site, &mut crawled).await;
 | 
					    crawl_wrapper(&dom, &db, &site, &mut crawled).await;
 | 
				
			||||||
    drop(pre_loop_span);
 | 
					    drop(pre_loop_span);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -50,7 +55,7 @@ async fn main() {
 | 
				
			|||||||
        let _ = span.enter();
 | 
					        let _ = span.enter();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for mut site in uncrawled {
 | 
					        for mut site in uncrawled {
 | 
				
			||||||
            if let Some(dom) = get(&mut site, &db).await {
 | 
					            if let Some(dom) = get(&mut site, &db, &client).await {
 | 
				
			||||||
                crawl_wrapper(&dom, &db, &site, &mut crawled).await;
 | 
					                crawl_wrapper(&dom, &db, &site, &mut crawled).await;
 | 
				
			||||||
                let percent = format!("{:.2}%", (crawled as f32/budget as f32) * 100f32);
 | 
					                let percent = format!("{:.2}%", (crawled as f32/budget as f32) * 100f32);
 | 
				
			||||||
                info!("Crawled {crawled} out of {budget} pages. ({percent})");
 | 
					                info!("Crawled {crawled} out of {budget} pages. ({percent})");
 | 
				
			||||||
@@ -75,10 +80,11 @@ async fn crawl_wrapper(dom: &Rc<Node>, db: &Surreal<Client>, site: &Website, cou
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#[instrument(skip_all)]
 | 
					#[instrument(skip_all)]
 | 
				
			||||||
/// A quick helper function for downloading a url
 | 
					/// A quick helper function for downloading a url
 | 
				
			||||||
async fn get(site: &mut Website, db: &Surreal<Client>) -> Option<Rc<Node>> {
 | 
					async fn get(site: &mut Website, db: &Surreal<Client>, getter: &reqwest::Client) -> Option<Rc<Node>> {
 | 
				
			||||||
    trace!("Get: {}", site.to_string());
 | 
					    trace!("Get: {}", site.to_string());
 | 
				
			||||||
    let timer = Timer::start("Got page");
 | 
					    let timer = Timer::start("Got page");
 | 
				
			||||||
    if let Ok(response) = reqwest::get(site.to_string()).await {
 | 
					
 | 
				
			||||||
 | 
					    if let Ok(response) = getter.get(site.to_string()).send().await {
 | 
				
			||||||
        drop(timer);
 | 
					        drop(timer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let data = response.text().await.unwrap();
 | 
					        let data = response.text().await.unwrap();
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user