unifed settings for testing
This commit is contained in:
		
							
								
								
									
										18
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								src/main.rs
									
									
									
									
									
								
							@@ -27,28 +27,30 @@ struct Config<'a> {
 | 
			
		||||
 | 
			
		||||
#[tokio::main]
 | 
			
		||||
async fn main() {
 | 
			
		||||
    let total_runtime = Timer::start("Completed");
 | 
			
		||||
 | 
			
		||||
    tracing_subscriber::fmt()
 | 
			
		||||
        .with_env_filter(EnvFilter::from_default_env())
 | 
			
		||||
        .with_line_number(true)
 | 
			
		||||
        .without_time()
 | 
			
		||||
        // .without_time()
 | 
			
		||||
        .init();
 | 
			
		||||
    debug!("Starting...");
 | 
			
		||||
    
 | 
			
		||||
    let config = Config {
 | 
			
		||||
        surreal_ns: "test",
 | 
			
		||||
        surreal_db: "v1.7",
 | 
			
		||||
        surreal_url: "localhost:8000",
 | 
			
		||||
        surreal_username: "root",
 | 
			
		||||
        surreal_password: "root",
 | 
			
		||||
        surreal_ns: "test",
 | 
			
		||||
        surreal_db: "custom-engine-v2",
 | 
			
		||||
        s3_bucket: "custom-engine-v2",
 | 
			
		||||
        s3_url: "http://localhost:9000",
 | 
			
		||||
        s3_bucket: "v1.7",
 | 
			
		||||
        s3_access_key: "8tUJn7e1paMFZQr0PKIT",
 | 
			
		||||
        s3_secret_key: "uSMvYxNOeCejCUgXVqgTfYlUEcmiZY0xcZ91M9E0",
 | 
			
		||||
        s3_access_key: "0zv7GbLQsw4ZI8TclMps",
 | 
			
		||||
        s3_secret_key: "5dB7QkGFw7fYbUJ5LpHk2GbWR7Bl710HlRz4NbzB",
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Would probably take these in as parameters from a cli
 | 
			
		||||
    let starting_url = "https://oliveratkinson.net/";
 | 
			
		||||
    let budget = 15;
 | 
			
		||||
    let budget = 5;
 | 
			
		||||
    let mut crawled = 0;
 | 
			
		||||
 | 
			
		||||
    let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting.");
 | 
			
		||||
@@ -94,6 +96,7 @@ async fn main() {
 | 
			
		||||
    drop(span);
 | 
			
		||||
 | 
			
		||||
    info!("Done");
 | 
			
		||||
    drop(total_runtime);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[instrument(skip_all)]
 | 
			
		||||
@@ -117,6 +120,7 @@ async fn get(
 | 
			
		||||
        // Parse document and store relationships
 | 
			
		||||
        parser::parse(db, site, data).await;
 | 
			
		||||
        *count += 1;
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    trace!("Failed to get: {}", site.to_string());
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -7,8 +7,10 @@ use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer, Tokeniz
 | 
			
		||||
use html5ever::{local_name, tendril::*};
 | 
			
		||||
use surrealdb::engine::remote::ws::Client;
 | 
			
		||||
use surrealdb::Surreal;
 | 
			
		||||
use tracing::instrument;
 | 
			
		||||
 | 
			
		||||
use crate::db::Website;
 | 
			
		||||
use crate::Timer;
 | 
			
		||||
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
struct LinkParser<'a> {
 | 
			
		||||
@@ -67,6 +69,7 @@ impl TokenSink for LinkParser<'_> {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[instrument(skip_all)]
 | 
			
		||||
pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: String) {
 | 
			
		||||
    
 | 
			
		||||
    site.set_crawled();
 | 
			
		||||
@@ -79,6 +82,7 @@ pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: String) {
 | 
			
		||||
 | 
			
		||||
    let token = Tokenizer::new(sink.clone(), TokenizerOpts::default());
 | 
			
		||||
    
 | 
			
		||||
    let t = Timer::start("Stored pages");
 | 
			
		||||
    let mut links_to = Vec::new();
 | 
			
		||||
    while !input.is_empty() {
 | 
			
		||||
        if let TokenizerResult::Script(s) = token.feed(&mut input) {
 | 
			
		||||
@@ -89,6 +93,7 @@ pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: String) {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    drop(t);
 | 
			
		||||
    sink.site.links_to(links_to, db).await;
 | 
			
		||||
    assert!(input.is_empty());
 | 
			
		||||
    token.end();
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user