From bd0b9462450fc78f791225565b1035a7458722b9 Mon Sep 17 00:00:00 2001 From: Rushmore75 Date: Tue, 18 Mar 2025 15:02:32 -0600 Subject: [PATCH] fixed tracing --- Cargo.toml | 2 +- src/db.rs | 13 ++++++++++++- src/main.rs | 21 +++++++++++++++++---- src/parser.rs | 2 -- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2dcb455..b04036c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ base64 = "0.22.1" html5ever = "0.29" # minio = "0.1.0" minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"} -reqwest = { version = "0.12", features = ["gzip"] } +reqwest = { version = "0.12", features = ["gzip", "default", "rustls-tls"] } serde = { version = "1.0", features = ["derive"] } surrealdb = "2.2" tokio = { version="1.41.0", features = ["full"] } diff --git a/src/db.rs b/src/db.rs index e8c19f4..02602dc 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,3 +1,4 @@ +use std::fmt::Debug; use serde::{Deserialize, Serialize}; use surrealdb::{ engine::remote::ws::{Client, Ws}, error::Db, opt::auth::Root, sql::Thing, Response, Surreal @@ -7,7 +8,7 @@ use url::Url; use crate::{Config, Timer}; -#[derive(Debug, Serialize, Deserialize, Clone)] +#[derive(Serialize, Deserialize, Clone)] pub struct Website { /// The url that this data is found at pub site: Url, @@ -17,6 +18,14 @@ pub struct Website { id: Option, } +// manual impl to make tracing look nicer +impl Debug for Website { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let site = (self.site.domain().unwrap_or("n/a")).to_string() + self.site.path(); + f.debug_struct("Website").field("site", &site).finish() + } +} + impl Website { /// Creates a blank site (assumes that url param is site's root) pub fn new(url: &str, crawled: bool) -> Self { @@ -80,6 +89,8 @@ impl Website { #[instrument(skip_all)] pub async fn store(&self, db: &Surreal) -> Option { + let t = Timer::start("Stored page"); + let _ = t; // check if it's been gone thru before let mut response = db .query("SELECT * FROM ONLY website WHERE site = $site LIMIT 1") diff --git a/src/main.rs b/src/main.rs index a87301d..a60f093 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,7 +9,7 @@ use db::{connect, Website}; use s3::S3; use surrealdb::{engine::remote::ws::Client, Surreal}; use tokio::task::JoinSet; -use tracing::{debug, info, instrument, trace, trace_span}; +use tracing::{debug, info, instrument, trace, trace_span, warn}; use tracing_subscriber::{fmt::time::LocalTime, EnvFilter}; mod db; @@ -36,6 +36,7 @@ async fn main() { tracing_subscriber::fmt() .with_env_filter(EnvFilter::from_default_env()) .with_line_number(true) + .with_thread_ids(true) .with_file(true) .with_timer(LocalTime::rfc_3339()) .init(); @@ -123,14 +124,20 @@ async fn main() { drop(total_runtime); } -#[instrument(skip_all)] +#[instrument(skip (db, s3, reqwest))] /// Downloads and crawls and stores a webpage. /// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver async fn get(mut site: Website, db: Surreal, reqwest: reqwest::Client, s3: S3) { trace!("Get: {}", site.to_string()); + + let timer = Timer::start("Built request"); + let request_builder = reqwest.get(site.to_string()); + timer.stop(); + let timer = Timer::start("Got page"); - if let Ok(response) = reqwest.get(site.to_string()).send().await { + if let Ok(response) = request_builder.send().await { timer.stop(); + debug!("Getting body..."); // Get body let data = response.text().await.expect("Failed to read http response's body!"); @@ -182,7 +189,13 @@ impl<'a> Timer<'a> { pub fn stop(&self) -> f64 { let dif = self.start.elapsed().as_micros(); let ms = dif as f64 / 1000.; - trace!("{}", format!("{} in {:.3}ms", self.msg, ms)); + + if ms > 200. { + warn!("{}", format!("{} in {:.3}ms", self.msg, ms)); + } else { + trace!("{}", format!("{} in {:.3}ms", self.msg, ms)); + } + ms } } diff --git a/src/parser.rs b/src/parser.rs index 3357059..68786b0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -93,7 +93,6 @@ pub async fn parse(db: &Surreal, site: &mut Website, data: &str) { } { - let t = Timer::start("Stored pages"); let mut links_to = Vec::new(); // this is a 2d vec accidentally @@ -108,6 +107,5 @@ pub async fn parse(db: &Surreal, site: &mut Website, data: &str) { } site.links_to(links_to, db).await; - drop(t); } }