fixed tracing
This commit is contained in:
		| @@ -8,7 +8,7 @@ base64 = "0.22.1" | |||||||
| html5ever = "0.29" | html5ever = "0.29" | ||||||
| # minio = "0.1.0" | # minio = "0.1.0" | ||||||
| minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"} | minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"} | ||||||
| reqwest = { version = "0.12", features = ["gzip"] } | reqwest = { version = "0.12", features = ["gzip", "default", "rustls-tls"] } | ||||||
| serde = { version = "1.0", features = ["derive"] } | serde = { version = "1.0", features = ["derive"] } | ||||||
| surrealdb = "2.2" | surrealdb = "2.2" | ||||||
| tokio = { version="1.41.0", features = ["full"] } | tokio = { version="1.41.0", features = ["full"] } | ||||||
|   | |||||||
							
								
								
									
										13
									
								
								src/db.rs
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								src/db.rs
									
									
									
									
									
								
							| @@ -1,3 +1,4 @@ | |||||||
|  | use std::fmt::Debug; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
| use surrealdb::{ | use surrealdb::{ | ||||||
|     engine::remote::ws::{Client, Ws}, error::Db, opt::auth::Root, sql::Thing, Response, Surreal |     engine::remote::ws::{Client, Ws}, error::Db, opt::auth::Root, sql::Thing, Response, Surreal | ||||||
| @@ -7,7 +8,7 @@ use url::Url; | |||||||
|  |  | ||||||
| use crate::{Config, Timer}; | use crate::{Config, Timer}; | ||||||
|  |  | ||||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | #[derive(Serialize, Deserialize, Clone)] | ||||||
| pub struct Website { | pub struct Website { | ||||||
|     /// The url that this data is found at |     /// The url that this data is found at | ||||||
|     pub site: Url, |     pub site: Url, | ||||||
| @@ -17,6 +18,14 @@ pub struct Website { | |||||||
|     id: Option<Thing>, |     id: Option<Thing>, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // manual impl to make tracing look nicer | ||||||
|  | impl Debug for Website { | ||||||
|  |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |         let site = (self.site.domain().unwrap_or("n/a")).to_string() + self.site.path(); | ||||||
|  |         f.debug_struct("Website").field("site", &site).finish() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| impl Website { | impl Website { | ||||||
|     /// Creates a blank site (assumes that url param is site's root) |     /// Creates a blank site (assumes that url param is site's root) | ||||||
|     pub fn new(url: &str, crawled: bool) -> Self { |     pub fn new(url: &str, crawled: bool) -> Self { | ||||||
| @@ -80,6 +89,8 @@ impl Website { | |||||||
|  |  | ||||||
|     #[instrument(skip_all)] |     #[instrument(skip_all)] | ||||||
|     pub async fn store(&self, db: &Surreal<Client>) -> Option<Thing> { |     pub async fn store(&self, db: &Surreal<Client>) -> Option<Thing> { | ||||||
|  |         let t = Timer::start("Stored page"); | ||||||
|  |         let _ = t; | ||||||
|         // check if it's been gone thru before |         // check if it's been gone thru before | ||||||
|         let mut response = db |         let mut response = db | ||||||
|             .query("SELECT * FROM ONLY website WHERE site = $site LIMIT 1") |             .query("SELECT * FROM ONLY website WHERE site = $site LIMIT 1") | ||||||
|   | |||||||
							
								
								
									
										21
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								src/main.rs
									
									
									
									
									
								
							| @@ -9,7 +9,7 @@ use db::{connect, Website}; | |||||||
| use s3::S3; | use s3::S3; | ||||||
| use surrealdb::{engine::remote::ws::Client, Surreal}; | use surrealdb::{engine::remote::ws::Client, Surreal}; | ||||||
| use tokio::task::JoinSet; | use tokio::task::JoinSet; | ||||||
| use tracing::{debug, info, instrument, trace, trace_span}; | use tracing::{debug, info, instrument, trace, trace_span, warn}; | ||||||
| use tracing_subscriber::{fmt::time::LocalTime, EnvFilter}; | use tracing_subscriber::{fmt::time::LocalTime, EnvFilter}; | ||||||
|  |  | ||||||
| mod db; | mod db; | ||||||
| @@ -36,6 +36,7 @@ async fn main() { | |||||||
|     tracing_subscriber::fmt() |     tracing_subscriber::fmt() | ||||||
|         .with_env_filter(EnvFilter::from_default_env()) |         .with_env_filter(EnvFilter::from_default_env()) | ||||||
|         .with_line_number(true) |         .with_line_number(true) | ||||||
|  |         .with_thread_ids(true) | ||||||
|         .with_file(true) |         .with_file(true) | ||||||
|         .with_timer(LocalTime::rfc_3339()) |         .with_timer(LocalTime::rfc_3339()) | ||||||
|         .init(); |         .init(); | ||||||
| @@ -123,14 +124,20 @@ async fn main() { | |||||||
|     drop(total_runtime); |     drop(total_runtime); | ||||||
| } | } | ||||||
|  |  | ||||||
| #[instrument(skip_all)] | #[instrument(skip (db, s3, reqwest))] | ||||||
| /// Downloads and crawls and stores a webpage. | /// Downloads and crawls and stores a webpage. | ||||||
| /// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver  | /// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver  | ||||||
| async fn get(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Client, s3: S3) { | async fn get(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Client, s3: S3) { | ||||||
|     trace!("Get: {}", site.to_string()); |     trace!("Get: {}", site.to_string()); | ||||||
|  |  | ||||||
|  |     let timer = Timer::start("Built request"); | ||||||
|  |     let request_builder = reqwest.get(site.to_string()); | ||||||
|  |     timer.stop(); | ||||||
|  |  | ||||||
|     let timer = Timer::start("Got page"); |     let timer = Timer::start("Got page"); | ||||||
|     if let Ok(response) = reqwest.get(site.to_string()).send().await { |     if let Ok(response) = request_builder.send().await { | ||||||
|         timer.stop(); |         timer.stop(); | ||||||
|  |         debug!("Getting body..."); | ||||||
|  |  | ||||||
|         // Get body |         // Get body | ||||||
|         let data = response.text().await.expect("Failed to read http response's body!"); |         let data = response.text().await.expect("Failed to read http response's body!"); | ||||||
| @@ -182,7 +189,13 @@ impl<'a> Timer<'a> { | |||||||
|     pub fn stop(&self) -> f64 { |     pub fn stop(&self) -> f64 { | ||||||
|         let dif = self.start.elapsed().as_micros(); |         let dif = self.start.elapsed().as_micros(); | ||||||
|         let ms = dif as f64 / 1000.; |         let ms = dif as f64 / 1000.; | ||||||
|         trace!("{}", format!("{} in {:.3}ms", self.msg, ms)); |  | ||||||
|  |         if ms > 200. { | ||||||
|  |             warn!("{}", format!("{} in {:.3}ms", self.msg, ms)); | ||||||
|  |         } else { | ||||||
|  |             trace!("{}", format!("{} in {:.3}ms", self.msg, ms)); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         ms |         ms | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -93,7 +93,6 @@ pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: &str) { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     { |     { | ||||||
|         let t = Timer::start("Stored pages"); |  | ||||||
|         let mut links_to = Vec::new(); |         let mut links_to = Vec::new(); | ||||||
|  |  | ||||||
|         // this is a 2d vec accidentally |         // this is a 2d vec accidentally | ||||||
| @@ -108,6 +107,5 @@ pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: &str) { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         site.links_to(links_to, db).await; |         site.links_to(links_to, db).await; | ||||||
|         drop(t); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user