fixed tracing
This commit is contained in:
		@@ -8,7 +8,7 @@ base64 = "0.22.1"
 | 
				
			|||||||
html5ever = "0.29"
 | 
					html5ever = "0.29"
 | 
				
			||||||
# minio = "0.1.0"
 | 
					# minio = "0.1.0"
 | 
				
			||||||
minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"}
 | 
					minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"}
 | 
				
			||||||
reqwest = { version = "0.12", features = ["gzip"] }
 | 
					reqwest = { version = "0.12", features = ["gzip", "default", "rustls-tls"] }
 | 
				
			||||||
serde = { version = "1.0", features = ["derive"] }
 | 
					serde = { version = "1.0", features = ["derive"] }
 | 
				
			||||||
surrealdb = "2.2"
 | 
					surrealdb = "2.2"
 | 
				
			||||||
tokio = { version="1.41.0", features = ["full"] }
 | 
					tokio = { version="1.41.0", features = ["full"] }
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										13
									
								
								src/db.rs
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								src/db.rs
									
									
									
									
									
								
							@@ -1,3 +1,4 @@
 | 
				
			|||||||
 | 
					use std::fmt::Debug;
 | 
				
			||||||
use serde::{Deserialize, Serialize};
 | 
					use serde::{Deserialize, Serialize};
 | 
				
			||||||
use surrealdb::{
 | 
					use surrealdb::{
 | 
				
			||||||
    engine::remote::ws::{Client, Ws}, error::Db, opt::auth::Root, sql::Thing, Response, Surreal
 | 
					    engine::remote::ws::{Client, Ws}, error::Db, opt::auth::Root, sql::Thing, Response, Surreal
 | 
				
			||||||
@@ -7,7 +8,7 @@ use url::Url;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
use crate::{Config, Timer};
 | 
					use crate::{Config, Timer};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
 | 
					#[derive(Serialize, Deserialize, Clone)]
 | 
				
			||||||
pub struct Website {
 | 
					pub struct Website {
 | 
				
			||||||
    /// The url that this data is found at
 | 
					    /// The url that this data is found at
 | 
				
			||||||
    pub site: Url,
 | 
					    pub site: Url,
 | 
				
			||||||
@@ -17,6 +18,14 @@ pub struct Website {
 | 
				
			|||||||
    id: Option<Thing>,
 | 
					    id: Option<Thing>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// manual impl to make tracing look nicer
 | 
				
			||||||
 | 
					impl Debug for Website {
 | 
				
			||||||
 | 
					    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
				
			||||||
 | 
					        let site = (self.site.domain().unwrap_or("n/a")).to_string() + self.site.path();
 | 
				
			||||||
 | 
					        f.debug_struct("Website").field("site", &site).finish()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl Website {
 | 
					impl Website {
 | 
				
			||||||
    /// Creates a blank site (assumes that url param is site's root)
 | 
					    /// Creates a blank site (assumes that url param is site's root)
 | 
				
			||||||
    pub fn new(url: &str, crawled: bool) -> Self {
 | 
					    pub fn new(url: &str, crawled: bool) -> Self {
 | 
				
			||||||
@@ -80,6 +89,8 @@ impl Website {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    #[instrument(skip_all)]
 | 
					    #[instrument(skip_all)]
 | 
				
			||||||
    pub async fn store(&self, db: &Surreal<Client>) -> Option<Thing> {
 | 
					    pub async fn store(&self, db: &Surreal<Client>) -> Option<Thing> {
 | 
				
			||||||
 | 
					        let t = Timer::start("Stored page");
 | 
				
			||||||
 | 
					        let _ = t;
 | 
				
			||||||
        // check if it's been gone thru before
 | 
					        // check if it's been gone thru before
 | 
				
			||||||
        let mut response = db
 | 
					        let mut response = db
 | 
				
			||||||
            .query("SELECT * FROM ONLY website WHERE site = $site LIMIT 1")
 | 
					            .query("SELECT * FROM ONLY website WHERE site = $site LIMIT 1")
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										21
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								src/main.rs
									
									
									
									
									
								
							@@ -9,7 +9,7 @@ use db::{connect, Website};
 | 
				
			|||||||
use s3::S3;
 | 
					use s3::S3;
 | 
				
			||||||
use surrealdb::{engine::remote::ws::Client, Surreal};
 | 
					use surrealdb::{engine::remote::ws::Client, Surreal};
 | 
				
			||||||
use tokio::task::JoinSet;
 | 
					use tokio::task::JoinSet;
 | 
				
			||||||
use tracing::{debug, info, instrument, trace, trace_span};
 | 
					use tracing::{debug, info, instrument, trace, trace_span, warn};
 | 
				
			||||||
use tracing_subscriber::{fmt::time::LocalTime, EnvFilter};
 | 
					use tracing_subscriber::{fmt::time::LocalTime, EnvFilter};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
mod db;
 | 
					mod db;
 | 
				
			||||||
@@ -36,6 +36,7 @@ async fn main() {
 | 
				
			|||||||
    tracing_subscriber::fmt()
 | 
					    tracing_subscriber::fmt()
 | 
				
			||||||
        .with_env_filter(EnvFilter::from_default_env())
 | 
					        .with_env_filter(EnvFilter::from_default_env())
 | 
				
			||||||
        .with_line_number(true)
 | 
					        .with_line_number(true)
 | 
				
			||||||
 | 
					        .with_thread_ids(true)
 | 
				
			||||||
        .with_file(true)
 | 
					        .with_file(true)
 | 
				
			||||||
        .with_timer(LocalTime::rfc_3339())
 | 
					        .with_timer(LocalTime::rfc_3339())
 | 
				
			||||||
        .init();
 | 
					        .init();
 | 
				
			||||||
@@ -123,14 +124,20 @@ async fn main() {
 | 
				
			|||||||
    drop(total_runtime);
 | 
					    drop(total_runtime);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[instrument(skip_all)]
 | 
					#[instrument(skip (db, s3, reqwest))]
 | 
				
			||||||
/// Downloads and crawls and stores a webpage.
 | 
					/// Downloads and crawls and stores a webpage.
 | 
				
			||||||
/// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver 
 | 
					/// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver 
 | 
				
			||||||
async fn get(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Client, s3: S3) {
 | 
					async fn get(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Client, s3: S3) {
 | 
				
			||||||
    trace!("Get: {}", site.to_string());
 | 
					    trace!("Get: {}", site.to_string());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let timer = Timer::start("Built request");
 | 
				
			||||||
 | 
					    let request_builder = reqwest.get(site.to_string());
 | 
				
			||||||
 | 
					    timer.stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let timer = Timer::start("Got page");
 | 
					    let timer = Timer::start("Got page");
 | 
				
			||||||
    if let Ok(response) = reqwest.get(site.to_string()).send().await {
 | 
					    if let Ok(response) = request_builder.send().await {
 | 
				
			||||||
        timer.stop();
 | 
					        timer.stop();
 | 
				
			||||||
 | 
					        debug!("Getting body...");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Get body
 | 
					        // Get body
 | 
				
			||||||
        let data = response.text().await.expect("Failed to read http response's body!");
 | 
					        let data = response.text().await.expect("Failed to read http response's body!");
 | 
				
			||||||
@@ -182,7 +189,13 @@ impl<'a> Timer<'a> {
 | 
				
			|||||||
    pub fn stop(&self) -> f64 {
 | 
					    pub fn stop(&self) -> f64 {
 | 
				
			||||||
        let dif = self.start.elapsed().as_micros();
 | 
					        let dif = self.start.elapsed().as_micros();
 | 
				
			||||||
        let ms = dif as f64 / 1000.;
 | 
					        let ms = dif as f64 / 1000.;
 | 
				
			||||||
        trace!("{}", format!("{} in {:.3}ms", self.msg, ms));
 | 
					
 | 
				
			||||||
 | 
					        if ms > 200. {
 | 
				
			||||||
 | 
					            warn!("{}", format!("{} in {:.3}ms", self.msg, ms));
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            trace!("{}", format!("{} in {:.3}ms", self.msg, ms));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        ms
 | 
					        ms
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -93,7 +93,6 @@ pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: &str) {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        let t = Timer::start("Stored pages");
 | 
					 | 
				
			||||||
        let mut links_to = Vec::new();
 | 
					        let mut links_to = Vec::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // this is a 2d vec accidentally
 | 
					        // this is a 2d vec accidentally
 | 
				
			||||||
@@ -108,6 +107,5 @@ pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: &str) {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        site.links_to(links_to, db).await;
 | 
					        site.links_to(links_to, db).await;
 | 
				
			||||||
        drop(t);
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user