clean up walk()
This commit is contained in:
		@@ -3,7 +3,8 @@ use surrealdb::{engine::remote::ws::{Client, Ws}, opt::auth::Root, sql::Thing, S
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Serialize)]
 | 
			
		||||
pub struct Website {
 | 
			
		||||
    pub url: String,
 | 
			
		||||
    pub site: String,
 | 
			
		||||
    pub href: String,
 | 
			
		||||
    pub crawled: bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										48
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										48
									
								
								src/main.rs
									
									
									
									
									
								
							@@ -45,10 +45,8 @@ async fn get(url: &str) -> Rc<Node> {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Should protentailly just check for the record first.
 | 
			
		||||
    let created: Option<db::Record> = match db.create("website").content(db::Website { url: site_name.to_string(), crawled: true } ).await {
 | 
			
		||||
    let created: Option<db::Record> = match db.create("website").content(db::Website { href: String::from("/"), crawled: true, site: site_name.to_string() } ).await {
 | 
			
		||||
        Ok(e) => e,
 | 
			
		||||
        Err(e) => {
 | 
			
		||||
            match e {
 | 
			
		||||
@@ -65,57 +63,33 @@ async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
    info!{"{:?}", created};
 | 
			
		||||
 | 
			
		||||
    match &node.data {
 | 
			
		||||
        rcdom::NodeData::Document => (),
 | 
			
		||||
        rcdom::NodeData::ProcessingInstruction { target, contents } => debug!("ProcessingInstruction"),
 | 
			
		||||
        rcdom::NodeData::Doctype { name, public_id, system_id } => debug!("doctype"),
 | 
			
		||||
        rcdom::NodeData::Text { contents } => {},
 | 
			
		||||
        rcdom::NodeData::Comment { contents } => debug!("comment"),
 | 
			
		||||
        rcdom::NodeData::Element { name, attrs, template_contents, mathml_annotation_xml_integration_point } => {
 | 
			
		||||
            let attrs = attrs.borrow().clone();
 | 
			
		||||
            for attr in attrs {
 | 
			
		||||
            for attr in attrs.borrow().clone() {
 | 
			
		||||
                let name = name.local.to_string();
 | 
			
		||||
                let link = &*attr.value;
 | 
			
		||||
                trace!("element: {name}, attr: {link}"); 
 | 
			
		||||
 | 
			
		||||
                if name == "a" {
 | 
			
		||||
                    if link.starts_with("mailto") {
 | 
			
		||||
                    if attr.value.starts_with("mailto") {
 | 
			
		||||
                        // mailto link, lol
 | 
			
		||||
                        warn!("{link}");
 | 
			
		||||
                        
 | 
			
		||||
                        let created: Option<db::Record> = db.create("email").content(db::Email { email: link.to_owned() }).await.unwrap();
 | 
			
		||||
                        let created: Option<db::Record> = db.create("email").content(db::Email { email: attr.value.to_string() }).await.unwrap();
 | 
			
		||||
                        info!("{:?}", created)
 | 
			
		||||
 | 
			
		||||
                    } else if link.starts_with("http") {
 | 
			
		||||
                        // normal link
 | 
			
		||||
                        debug!("{link}")
 | 
			
		||||
                    } else if link.contains("/") {
 | 
			
		||||
                        // possibly a relative link?
 | 
			
		||||
                        //
 | 
			
		||||
                        // TODO This needs more logic handling. Needs to handle the following cases:
 | 
			
		||||
                        //
 | 
			
		||||
                        // Absolute links:
 | 
			
		||||
                        // /img.png
 | 
			
		||||
                        // /file-no-extension
 | 
			
		||||
                        //
 | 
			
		||||
                        // Realtive Links:
 | 
			
		||||
                        //
 | 
			
		||||
                        // img.png
 | 
			
		||||
                        // file-no-extnesion
 | 
			
		||||
                        //
 | 
			
		||||
                        let link_name = format!("{site_name}/{link}");
 | 
			
		||||
                        debug!("{link_name}");
 | 
			
		||||
                        let created: Option<db::Record> = db.create("website").content(db::Website { url: link_name, crawled: false } ).await.unwrap();
 | 
			
		||||
                    } else {
 | 
			
		||||
                        error!("Unhandled link type: {link}")
 | 
			
		||||
                        // Every not-mailto link
 | 
			
		||||
                        let created: Option<db::Record> = db.create("website").content(db::Website { href: attr.value.to_string(), crawled: false, site: site_name.to_string() } ).await.unwrap();
 | 
			
		||||
                        info!{"{:?}", created};
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
        },
 | 
			
		||||
        rcdom::NodeData::ProcessingInstruction { target, contents } => debug!("ProcessingInstruction"),
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    for child in &*node.children.borrow() {
 | 
			
		||||
        Box::pin(walk(&child, db, site_name)).await; 
 | 
			
		||||
    for child in node.children.borrow().iter() {
 | 
			
		||||
        Box::pin(walk(child, db, site_name)).await; 
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user