diff --git a/src/main.rs b/src/main.rs index 5af7166..848dd69 100644 --- a/src/main.rs +++ b/src/main.rs @@ -183,7 +183,7 @@ async fn process_single_thread( #[instrument(skip(db, reqwest))] /// Downloads and crawls and stores a webpage. /// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver -async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Client) { +async fn process(site: Website, db: Surreal, reqwest: reqwest::Client) { // METRICS debug!(url = &site.site.as_str(), "Process: {}", &site.site); BEING_PROCESSED.add(1, &[]); @@ -251,9 +251,16 @@ async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Clien } } + let update_in_db = async |mut site: Website| { + // update self in db + site.crawled = true; + site.status_code = code.as_u16(); + Website::store_all(vec![site.clone()], &db).await; + }; if skip_download { trace!("Skipping download..."); + update_in_db(site).await; } else { // make sure that the file is good to go if let Some(file) = filesystem::init(&tmp_path).await { @@ -326,11 +333,10 @@ async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Clien } // update self in db - site.crawled = true; - site.status_code = code.as_u16(); - Website::store_all(vec![site.clone()], &db).await; + update_in_db(site).await; } } + } else { error!(url = site.site.as_str(), "Failed to get: {}", &site.site); }