internet_mapper/src/main.rs

59 lines
1.7 KiB
Rust
Raw Normal View History

extern crate markup5ever_rcdom as rcdom;
extern crate html5ever;
2024-08-23 11:22:49 +00:00
use std::env;
use html5ever::{parse_document, tendril::TendrilSink, tree_builder::TreeBuilderOpts, ParseOpts};
use rcdom::RcDom;
use tracing::{debug, info, trace, warn};
2024-08-23 11:22:49 +00:00
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
debug!("Starting...");
let url = "https://oliveratkinson.net";
let budget = "10";
let response = reqwest::get(url).await.unwrap();
let data = response.text().await.unwrap();
let opts = ParseOpts {
tree_builder: TreeBuilderOpts {
drop_doctype: true,
..Default::default()
},
..Default::default()
2024-08-25 21:50:59 +00:00
};
2024-08-23 11:22:49 +00:00
let dom = parse_document(RcDom::default(), opts)
.from_utf8()
.read_from(&mut data.as_bytes())
.unwrap();
2024-08-23 11:22:49 +00:00
let a = &dom.document;
warn!("Walking...");
2024-08-25 21:50:59 +00:00
walk(a);
}
2024-08-25 21:50:59 +00:00
fn walk(node: &rcdom::Handle) {
match &node.data {
rcdom::NodeData::Document => (),
rcdom::NodeData::Doctype { name, public_id, system_id } => debug!("doctype"),
rcdom::NodeData::Text { contents } => {},
rcdom::NodeData::Comment { contents } => debug!("comment"),
rcdom::NodeData::Element { name, attrs, template_contents, mathml_annotation_xml_integration_point } => {
attrs.borrow().iter().for_each(|attr| {
let name = name.local.to_string();
let internal = &*attr.value;
debug!("element: {name}, attr: {internal}");
});
},
rcdom::NodeData::ProcessingInstruction { target, contents } => debug!("ProcessingInstruction"),
};
2024-08-23 11:22:49 +00:00
node.children.borrow().iter().for_each(|n| walk(n));
2024-08-23 11:22:49 +00:00
}