jank-ish fix but it sure does work

make the root record (for links https://example.com/) have a record id of the url, thus preventing duplication when using upsert
This commit is contained in:
Oliver Atkinson 2024-10-31 15:32:37 -06:00
parent 3a46dd937b
commit 7826c4cec6

View File

@ -6,7 +6,7 @@ use db::connect;
use html5ever::{parse_document, tendril::TendrilSink, tree_builder::TreeBuilderOpts, ParseOpts};
use rcdom::{Node, RcDom};
use surrealdb::{engine::remote::ws::Client, Surreal};
use tracing::{debug, info, warn};
use tracing::{debug, error, info, warn};
mod db;
@ -46,13 +46,14 @@ async fn get(url: &str) -> Rc<Node> {
async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) {
// Insert Or Update
let _created: Vec<db::Record> = match db.upsert("website").content(db::Website { href: String::from("/"), crawled: true, site: site_name.to_string() } ).await {
let _: Option<Vec<db::Record>> = match db.upsert(("website", site_name)).content(db::Website { href: String::from("/"), crawled: true, site: site_name.to_string() } ).await {
Ok(e) => {
// Return this for type coercion
e
},
Err(e) => {
unimplemented!("{}", e);
// error!("{}", e);
None
}
};
@ -68,14 +69,20 @@ async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) {
}).await.unwrap();
warn!("{:?}", created)
} else {
let href = attr.value.to_string();
info!("{}", href);
// Every not-mailto link
let _created: Option<db::Record> = db.create("website").content(db::Website {
href,
// FIXME this isn't actually creating records...?
let _: Option<db::Record> = match db.create("website").content(db::Website {
href: attr.value.to_string(),
crawled: false,
site: site_name.to_string()
}).await.unwrap();
}).await {
Ok(e) => {
if let Some(a) = &e {
debug!("{:?}", a);
}
e
},
Err(_) => todo!(),
};
}
}
};