updating deps
This commit is contained in:
17
src/main.rs
17
src/main.rs
@@ -31,8 +31,8 @@ async fn main() {
|
||||
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env())
|
||||
.with_line_number(true)
|
||||
// .without_time()
|
||||
.with_line_number(false)
|
||||
.without_time()
|
||||
.init();
|
||||
debug!("Starting...");
|
||||
|
||||
@@ -49,7 +49,9 @@ async fn main() {
|
||||
};
|
||||
|
||||
// Would probably take these in as parameters from a cli
|
||||
let starting_url = "https://oliveratkinson.net/";
|
||||
let starting_url = "https://en.wikipedia.org/";
|
||||
// When getting uncrawled pages, name must be LIKE this variable. "" will effectively get ignored.
|
||||
let crawl_like = "wikipedia";
|
||||
let budget = 5;
|
||||
let mut crawled = 0;
|
||||
|
||||
@@ -76,7 +78,7 @@ async fn main() {
|
||||
while crawled < budget {
|
||||
let get_num = if budget - crawled < 100 { budget - crawled } else { 100 };
|
||||
|
||||
let uncrawled = get_uncrawled_links(&db, get_num).await;
|
||||
let uncrawled = get_uncrawled_links(&db, get_num, crawl_like.to_string()).await;
|
||||
if uncrawled.len() == 0 {
|
||||
info!("Had more budget but finished crawling everything.");
|
||||
return;
|
||||
@@ -126,13 +128,16 @@ async fn get(
|
||||
}
|
||||
|
||||
/// Returns uncrawled links
|
||||
async fn get_uncrawled_links(db: &Surreal<Client>, mut count: usize) -> Vec<Website> {
|
||||
#[instrument(skip(db))]
|
||||
async fn get_uncrawled_links(db: &Surreal<Client>, mut count: usize, param: String) -> Vec<Website> {
|
||||
if count > 100 {
|
||||
count = 100
|
||||
}
|
||||
trace!("Getting uncrawled links");
|
||||
|
||||
let mut response = db
|
||||
.query("SELECT * FROM website WHERE crawled = false LIMIT $count")
|
||||
.query("SELECT * FROM website WHERE crawled = false AND site ~ type::string($format) LIMIT $count;")
|
||||
.bind(("format", param))
|
||||
.bind(("count", count))
|
||||
.await
|
||||
.expect("Hard-coded query failed..?");
|
||||
|
Reference in New Issue
Block a user