jank-ish fix but it sure does work

make the root record (for links https://example.com/) have a record id of the url, thus preventing duplication when using upsert
This commit is contained in:
Oliver Atkinson 2024-10-31 15:32:37 -06:00
parent 3a46dd937b
commit 7826c4cec6

View File

@ -6,7 +6,7 @@ use db::connect;
use html5ever::{parse_document, tendril::TendrilSink, tree_builder::TreeBuilderOpts, ParseOpts}; use html5ever::{parse_document, tendril::TendrilSink, tree_builder::TreeBuilderOpts, ParseOpts};
use rcdom::{Node, RcDom}; use rcdom::{Node, RcDom};
use surrealdb::{engine::remote::ws::Client, Surreal}; use surrealdb::{engine::remote::ws::Client, Surreal};
use tracing::{debug, info, warn}; use tracing::{debug, error, info, warn};
mod db; mod db;
@ -46,13 +46,14 @@ async fn get(url: &str) -> Rc<Node> {
async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) { async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) {
// Insert Or Update // Insert Or Update
let _created: Vec<db::Record> = match db.upsert("website").content(db::Website { href: String::from("/"), crawled: true, site: site_name.to_string() } ).await { let _: Option<Vec<db::Record>> = match db.upsert(("website", site_name)).content(db::Website { href: String::from("/"), crawled: true, site: site_name.to_string() } ).await {
Ok(e) => { Ok(e) => {
// Return this for type coercion // Return this for type coercion
e e
}, },
Err(e) => { Err(e) => {
unimplemented!("{}", e); // error!("{}", e);
None
} }
}; };
@ -68,14 +69,20 @@ async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) {
}).await.unwrap(); }).await.unwrap();
warn!("{:?}", created) warn!("{:?}", created)
} else { } else {
let href = attr.value.to_string(); // FIXME this isn't actually creating records...?
info!("{}", href); let _: Option<db::Record> = match db.create("website").content(db::Website {
// Every not-mailto link href: attr.value.to_string(),
let _created: Option<db::Record> = db.create("website").content(db::Website {
href,
crawled: false, crawled: false,
site: site_name.to_string() site: site_name.to_string()
}).await.unwrap(); }).await {
Ok(e) => {
if let Some(a) = &e {
debug!("{:?}", a);
}
e
},
Err(_) => todo!(),
};
} }
} }
}; };