From 298ad39a790a9648fb7bf04f6f92a0c6769ed9b2 Mon Sep 17 00:00:00 2001 From: Oliver Atkinson Date: Thu, 12 Dec 2024 14:59:54 -0700 Subject: [PATCH] rename --- src/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 8385a79..6db0563 100644 --- a/src/main.rs +++ b/src/main.rs @@ -50,9 +50,9 @@ async fn main() { // Would probably take these in as parameters from a cli let starting_url = "https://en.wikipedia.org/"; - // When getting uncrawled pages, name must be LIKE this variable. "" will effectively get ignored. - let crawl_like = "wikipedia"; - let budget = 5; + // When getting uncrawled pages, name must contain this variable. "" will effectively get ignored. + let crawl_filter = "https://en.wikipedia.org/"; + let budget = 50; let mut crawled = 0; let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting."); @@ -78,7 +78,7 @@ async fn main() { while crawled < budget { let get_num = if budget - crawled < 100 { budget - crawled } else { 100 }; - let uncrawled = get_uncrawled_links(&db, get_num, crawl_like.to_string()).await; + let uncrawled = get_uncrawled_links(&db, get_num, crawl_filter.to_string()).await; if uncrawled.len() == 0 { info!("Had more budget but finished crawling everything."); return;