From 1e59ebd5c4465b3e74fd185f3fcb7040c56c2d43 Mon Sep 17 00:00:00 2001 From: Oliver Date: Thu, 9 Oct 2025 22:13:06 -0600 Subject: [PATCH] even when not downloading, update the database --- src/main.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5af7166..848dd69 100644 --- a/src/main.rs +++ b/src/main.rs @@ -183,7 +183,7 @@ async fn process_single_thread( #[instrument(skip(db, reqwest))] /// Downloads and crawls and stores a webpage. /// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver -async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Client) { +async fn process(site: Website, db: Surreal, reqwest: reqwest::Client) { // METRICS debug!(url = &site.site.as_str(), "Process: {}", &site.site); BEING_PROCESSED.add(1, &[]); @@ -251,9 +251,16 @@ async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Clien } } + let update_in_db = async |mut site: Website| { + // update self in db + site.crawled = true; + site.status_code = code.as_u16(); + Website::store_all(vec![site.clone()], &db).await; + }; if skip_download { trace!("Skipping download..."); + update_in_db(site).await; } else { // make sure that the file is good to go if let Some(file) = filesystem::init(&tmp_path).await { @@ -326,11 +333,10 @@ async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Clien } // update self in db - site.crawled = true; - site.status_code = code.as_u16(); - Website::store_all(vec![site.clone()], &db).await; + update_in_db(site).await; } } + } else { error!(url = site.site.as_str(), "Failed to get: {}", &site.site); }