rename
This commit is contained in:
parent
215056e493
commit
298ad39a79
@ -50,9 +50,9 @@ async fn main() {
|
|||||||
|
|
||||||
// Would probably take these in as parameters from a cli
|
// Would probably take these in as parameters from a cli
|
||||||
let starting_url = "https://en.wikipedia.org/";
|
let starting_url = "https://en.wikipedia.org/";
|
||||||
// When getting uncrawled pages, name must be LIKE this variable. "" will effectively get ignored.
|
// When getting uncrawled pages, name must contain this variable. "" will effectively get ignored.
|
||||||
let crawl_like = "wikipedia";
|
let crawl_filter = "https://en.wikipedia.org/";
|
||||||
let budget = 5;
|
let budget = 50;
|
||||||
let mut crawled = 0;
|
let mut crawled = 0;
|
||||||
|
|
||||||
let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting.");
|
let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting.");
|
||||||
@ -78,7 +78,7 @@ async fn main() {
|
|||||||
while crawled < budget {
|
while crawled < budget {
|
||||||
let get_num = if budget - crawled < 100 { budget - crawled } else { 100 };
|
let get_num = if budget - crawled < 100 { budget - crawled } else { 100 };
|
||||||
|
|
||||||
let uncrawled = get_uncrawled_links(&db, get_num, crawl_like.to_string()).await;
|
let uncrawled = get_uncrawled_links(&db, get_num, crawl_filter.to_string()).await;
|
||||||
if uncrawled.len() == 0 {
|
if uncrawled.len() == 0 {
|
||||||
info!("Had more budget but finished crawling everything.");
|
info!("Had more budget but finished crawling everything.");
|
||||||
return;
|
return;
|
||||||
|
Loading…
Reference in New Issue
Block a user