This commit is contained in:
Oliver Atkinson 2024-12-12 14:59:54 -07:00
parent 215056e493
commit 298ad39a79

View File

@ -50,9 +50,9 @@ async fn main() {
// Would probably take these in as parameters from a cli
let starting_url = "https://en.wikipedia.org/";
// When getting uncrawled pages, name must be LIKE this variable. "" will effectively get ignored.
let crawl_like = "wikipedia";
let budget = 5;
// When getting uncrawled pages, name must contain this variable. "" will effectively get ignored.
let crawl_filter = "https://en.wikipedia.org/";
let budget = 50;
let mut crawled = 0;
let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting.");
@ -78,7 +78,7 @@ async fn main() {
while crawled < budget {
let get_num = if budget - crawled < 100 { budget - crawled } else { 100 };
let uncrawled = get_uncrawled_links(&db, get_num, crawl_like.to_string()).await;
let uncrawled = get_uncrawled_links(&db, get_num, crawl_filter.to_string()).await;
if uncrawled.len() == 0 {
info!("Had more budget but finished crawling everything.");
return;