fixed tracing
This commit is contained in:
parent
b7540a4680
commit
bd0b946245
@ -8,7 +8,7 @@ base64 = "0.22.1"
|
|||||||
html5ever = "0.29"
|
html5ever = "0.29"
|
||||||
# minio = "0.1.0"
|
# minio = "0.1.0"
|
||||||
minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"}
|
minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"}
|
||||||
reqwest = { version = "0.12", features = ["gzip"] }
|
reqwest = { version = "0.12", features = ["gzip", "default", "rustls-tls"] }
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
surrealdb = "2.2"
|
surrealdb = "2.2"
|
||||||
tokio = { version="1.41.0", features = ["full"] }
|
tokio = { version="1.41.0", features = ["full"] }
|
||||||
|
13
src/db.rs
13
src/db.rs
@ -1,3 +1,4 @@
|
|||||||
|
use std::fmt::Debug;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use surrealdb::{
|
use surrealdb::{
|
||||||
engine::remote::ws::{Client, Ws}, error::Db, opt::auth::Root, sql::Thing, Response, Surreal
|
engine::remote::ws::{Client, Ws}, error::Db, opt::auth::Root, sql::Thing, Response, Surreal
|
||||||
@ -7,7 +8,7 @@ use url::Url;
|
|||||||
|
|
||||||
use crate::{Config, Timer};
|
use crate::{Config, Timer};
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
pub struct Website {
|
pub struct Website {
|
||||||
/// The url that this data is found at
|
/// The url that this data is found at
|
||||||
pub site: Url,
|
pub site: Url,
|
||||||
@ -17,6 +18,14 @@ pub struct Website {
|
|||||||
id: Option<Thing>,
|
id: Option<Thing>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// manual impl to make tracing look nicer
|
||||||
|
impl Debug for Website {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let site = (self.site.domain().unwrap_or("n/a")).to_string() + self.site.path();
|
||||||
|
f.debug_struct("Website").field("site", &site).finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Website {
|
impl Website {
|
||||||
/// Creates a blank site (assumes that url param is site's root)
|
/// Creates a blank site (assumes that url param is site's root)
|
||||||
pub fn new(url: &str, crawled: bool) -> Self {
|
pub fn new(url: &str, crawled: bool) -> Self {
|
||||||
@ -80,6 +89,8 @@ impl Website {
|
|||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
pub async fn store(&self, db: &Surreal<Client>) -> Option<Thing> {
|
pub async fn store(&self, db: &Surreal<Client>) -> Option<Thing> {
|
||||||
|
let t = Timer::start("Stored page");
|
||||||
|
let _ = t;
|
||||||
// check if it's been gone thru before
|
// check if it's been gone thru before
|
||||||
let mut response = db
|
let mut response = db
|
||||||
.query("SELECT * FROM ONLY website WHERE site = $site LIMIT 1")
|
.query("SELECT * FROM ONLY website WHERE site = $site LIMIT 1")
|
||||||
|
21
src/main.rs
21
src/main.rs
@ -9,7 +9,7 @@ use db::{connect, Website};
|
|||||||
use s3::S3;
|
use s3::S3;
|
||||||
use surrealdb::{engine::remote::ws::Client, Surreal};
|
use surrealdb::{engine::remote::ws::Client, Surreal};
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use tracing::{debug, info, instrument, trace, trace_span};
|
use tracing::{debug, info, instrument, trace, trace_span, warn};
|
||||||
use tracing_subscriber::{fmt::time::LocalTime, EnvFilter};
|
use tracing_subscriber::{fmt::time::LocalTime, EnvFilter};
|
||||||
|
|
||||||
mod db;
|
mod db;
|
||||||
@ -36,6 +36,7 @@ async fn main() {
|
|||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
.with_env_filter(EnvFilter::from_default_env())
|
.with_env_filter(EnvFilter::from_default_env())
|
||||||
.with_line_number(true)
|
.with_line_number(true)
|
||||||
|
.with_thread_ids(true)
|
||||||
.with_file(true)
|
.with_file(true)
|
||||||
.with_timer(LocalTime::rfc_3339())
|
.with_timer(LocalTime::rfc_3339())
|
||||||
.init();
|
.init();
|
||||||
@ -123,14 +124,20 @@ async fn main() {
|
|||||||
drop(total_runtime);
|
drop(total_runtime);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip (db, s3, reqwest))]
|
||||||
/// Downloads and crawls and stores a webpage.
|
/// Downloads and crawls and stores a webpage.
|
||||||
/// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver
|
/// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver
|
||||||
async fn get(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Client, s3: S3) {
|
async fn get(mut site: Website, db: Surreal<Client>, reqwest: reqwest::Client, s3: S3) {
|
||||||
trace!("Get: {}", site.to_string());
|
trace!("Get: {}", site.to_string());
|
||||||
|
|
||||||
|
let timer = Timer::start("Built request");
|
||||||
|
let request_builder = reqwest.get(site.to_string());
|
||||||
|
timer.stop();
|
||||||
|
|
||||||
let timer = Timer::start("Got page");
|
let timer = Timer::start("Got page");
|
||||||
if let Ok(response) = reqwest.get(site.to_string()).send().await {
|
if let Ok(response) = request_builder.send().await {
|
||||||
timer.stop();
|
timer.stop();
|
||||||
|
debug!("Getting body...");
|
||||||
|
|
||||||
// Get body
|
// Get body
|
||||||
let data = response.text().await.expect("Failed to read http response's body!");
|
let data = response.text().await.expect("Failed to read http response's body!");
|
||||||
@ -182,7 +189,13 @@ impl<'a> Timer<'a> {
|
|||||||
pub fn stop(&self) -> f64 {
|
pub fn stop(&self) -> f64 {
|
||||||
let dif = self.start.elapsed().as_micros();
|
let dif = self.start.elapsed().as_micros();
|
||||||
let ms = dif as f64 / 1000.;
|
let ms = dif as f64 / 1000.;
|
||||||
trace!("{}", format!("{} in {:.3}ms", self.msg, ms));
|
|
||||||
|
if ms > 200. {
|
||||||
|
warn!("{}", format!("{} in {:.3}ms", self.msg, ms));
|
||||||
|
} else {
|
||||||
|
trace!("{}", format!("{} in {:.3}ms", self.msg, ms));
|
||||||
|
}
|
||||||
|
|
||||||
ms
|
ms
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,6 @@ pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: &str) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let t = Timer::start("Stored pages");
|
|
||||||
let mut links_to = Vec::new();
|
let mut links_to = Vec::new();
|
||||||
|
|
||||||
// this is a 2d vec accidentally
|
// this is a 2d vec accidentally
|
||||||
@ -108,6 +107,5 @@ pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: &str) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
site.links_to(links_to, db).await;
|
site.links_to(links_to, db).await;
|
||||||
drop(t);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user