diff --git a/src/main.rs b/src/main.rs index f19cd84..8449efc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,28 +27,30 @@ struct Config<'a> { #[tokio::main] async fn main() { + let total_runtime = Timer::start("Completed"); + tracing_subscriber::fmt() .with_env_filter(EnvFilter::from_default_env()) .with_line_number(true) - .without_time() + // .without_time() .init(); debug!("Starting..."); let config = Config { - surreal_ns: "test", - surreal_db: "v1.7", surreal_url: "localhost:8000", surreal_username: "root", surreal_password: "root", + surreal_ns: "test", + surreal_db: "custom-engine-v2", + s3_bucket: "custom-engine-v2", s3_url: "http://localhost:9000", - s3_bucket: "v1.7", - s3_access_key: "8tUJn7e1paMFZQr0PKIT", - s3_secret_key: "uSMvYxNOeCejCUgXVqgTfYlUEcmiZY0xcZ91M9E0", + s3_access_key: "0zv7GbLQsw4ZI8TclMps", + s3_secret_key: "5dB7QkGFw7fYbUJ5LpHk2GbWR7Bl710HlRz4NbzB", }; // Would probably take these in as parameters from a cli let starting_url = "https://oliveratkinson.net/"; - let budget = 15; + let budget = 5; let mut crawled = 0; let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting."); @@ -94,6 +96,7 @@ async fn main() { drop(span); info!("Done"); + drop(total_runtime); } #[instrument(skip_all)] @@ -117,6 +120,7 @@ async fn get( // Parse document and store relationships parser::parse(db, site, data).await; *count += 1; + return; } trace!("Failed to get: {}", site.to_string()); } diff --git a/src/parser.rs b/src/parser.rs index 17143f6..09d1dff 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,8 +7,10 @@ use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer, Tokeniz use html5ever::{local_name, tendril::*}; use surrealdb::engine::remote::ws::Client; use surrealdb::Surreal; +use tracing::instrument; use crate::db::Website; +use crate::Timer; #[derive(Clone)] struct LinkParser<'a> { @@ -67,6 +69,7 @@ impl TokenSink for LinkParser<'_> { } } +#[instrument(skip_all)] pub async fn parse(db: &Surreal, site: &mut Website, data: String) { site.set_crawled(); @@ -79,6 +82,7 @@ pub async fn parse(db: &Surreal, site: &mut Website, data: String) { let token = Tokenizer::new(sink.clone(), TokenizerOpts::default()); + let t = Timer::start("Stored pages"); let mut links_to = Vec::new(); while !input.is_empty() { if let TokenizerResult::Script(s) = token.feed(&mut input) { @@ -89,6 +93,7 @@ pub async fn parse(db: &Surreal, site: &mut Website, data: String) { } } } + drop(t); sink.site.links_to(links_to, db).await; assert!(input.is_empty()); token.end();