even when not downloading, update the database

This commit is contained in:
2025-10-09 22:13:06 -06:00
parent 52d5e101d0
commit 1e59ebd5c4

View File

@@ -183,7 +183,7 @@ async fn process_single_thread(
#[instrument(skip(db, reqwest))]
/// Downloads and crawls and stores a webpage.
/// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver
async fn process(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Client) {
async fn process(site: Website, db: Surreal<Client>, reqwest: reqwest::Client) {
// METRICS
debug!(url = &site.site.as_str(), "Process: {}", &site.site);
BEING_PROCESSED.add(1, &[]);
@@ -251,9 +251,16 @@ async fn process(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Clien
}
}
let update_in_db = async |mut site: Website| {
// update self in db
site.crawled = true;
site.status_code = code.as_u16();
Website::store_all(vec![site.clone()], &db).await;
};
if skip_download {
trace!("Skipping download...");
update_in_db(site).await;
} else {
// make sure that the file is good to go
if let Some(file) = filesystem::init(&tmp_path).await {
@@ -326,11 +333,10 @@ async fn process(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Clien
}
// update self in db
site.crawled = true;
site.status_code = code.as_u16();
Website::store_all(vec![site.clone()], &db).await;
update_in_db(site).await;
}
}
} else {
error!(url = site.site.as_str(), "Failed to get: {}", &site.site);
}