multithreading #2
@@ -50,9 +50,9 @@ async fn main() {
 | 
			
		||||
 | 
			
		||||
    // Would probably take these in as parameters from a cli
 | 
			
		||||
    let starting_url = "https://en.wikipedia.org/";
 | 
			
		||||
    // When getting uncrawled pages, name must be LIKE this variable. "" will effectively get ignored.
 | 
			
		||||
    let crawl_like = "wikipedia";
 | 
			
		||||
    let budget = 5;
 | 
			
		||||
    // When getting uncrawled pages, name must contain this variable. "" will effectively get ignored.
 | 
			
		||||
    let crawl_filter = "https://en.wikipedia.org/";
 | 
			
		||||
    let budget = 50;
 | 
			
		||||
    let mut crawled = 0;
 | 
			
		||||
 | 
			
		||||
    let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting.");
 | 
			
		||||
@@ -78,7 +78,7 @@ async fn main() {
 | 
			
		||||
    while crawled < budget {
 | 
			
		||||
        let get_num = if budget - crawled < 100 { budget - crawled } else { 100 };
 | 
			
		||||
 | 
			
		||||
        let uncrawled = get_uncrawled_links(&db, get_num, crawl_like.to_string()).await;
 | 
			
		||||
        let uncrawled = get_uncrawled_links(&db, get_num, crawl_filter.to_string()).await;
 | 
			
		||||
        if uncrawled.len() == 0 {
 | 
			
		||||
            info!("Had more budget but finished crawling everything.");
 | 
			
		||||
            return;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user