multithreading #2
							
								
								
									
										24
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										24
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							@@ -1987,18 +1987,6 @@ dependencies = [
 | 
				
			|||||||
 "tendril",
 | 
					 "tendril",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					 | 
				
			||||||
name = "markup5ever_rcdom"
 | 
					 | 
				
			||||||
version = "0.5.0-unofficial"
 | 
					 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					 | 
				
			||||||
checksum = "d9cb12459c4cab18dcc580159590f404ad78c0a9c5435ace80288ed43abdce31"
 | 
					 | 
				
			||||||
dependencies = [
 | 
					 | 
				
			||||||
 "html5ever 0.29.0",
 | 
					 | 
				
			||||||
 "markup5ever 0.14.0",
 | 
					 | 
				
			||||||
 "tendril",
 | 
					 | 
				
			||||||
 "xml5ever",
 | 
					 | 
				
			||||||
]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "matchers"
 | 
					name = "matchers"
 | 
				
			||||||
version = "0.1.0"
 | 
					version = "0.1.0"
 | 
				
			||||||
@@ -3653,7 +3641,6 @@ name = "surreal_spider"
 | 
				
			|||||||
version = "0.1.0"
 | 
					version = "0.1.0"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "html5ever 0.29.0",
 | 
					 "html5ever 0.29.0",
 | 
				
			||||||
 "markup5ever_rcdom",
 | 
					 | 
				
			||||||
 "minio",
 | 
					 "minio",
 | 
				
			||||||
 "reqwest",
 | 
					 "reqwest",
 | 
				
			||||||
 "serde",
 | 
					 "serde",
 | 
				
			||||||
@@ -4725,17 +4712,6 @@ version = "0.8.23"
 | 
				
			|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "af310deaae937e48a26602b730250b4949e125f468f11e6990be3e5304ddd96f"
 | 
					checksum = "af310deaae937e48a26602b730250b4949e125f468f11e6990be3e5304ddd96f"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					 | 
				
			||||||
name = "xml5ever"
 | 
					 | 
				
			||||||
version = "0.20.0"
 | 
					 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					 | 
				
			||||||
checksum = "2278b4bf33071ba8e30368a59436c65eec8e01c49d5c29b3dfeb0cdc45331383"
 | 
					 | 
				
			||||||
dependencies = [
 | 
					 | 
				
			||||||
 "log",
 | 
					 | 
				
			||||||
 "mac",
 | 
					 | 
				
			||||||
 "markup5ever 0.14.0",
 | 
					 | 
				
			||||||
]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "xmltree"
 | 
					name = "xmltree"
 | 
				
			||||||
version = "0.11.0"
 | 
					version = "0.11.0"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,7 +5,6 @@ edition = "2021"
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[dependencies]
 | 
					[dependencies]
 | 
				
			||||||
html5ever = "0.29.0"
 | 
					html5ever = "0.29.0"
 | 
				
			||||||
markup5ever_rcdom = "0.5.0-unofficial"
 | 
					 | 
				
			||||||
# minio = "0.1.0"
 | 
					# minio = "0.1.0"
 | 
				
			||||||
minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"}
 | 
					minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"}
 | 
				
			||||||
reqwest = "0.12.9"
 | 
					reqwest = "0.12.9"
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										121
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										121
									
								
								src/main.rs
									
									
									
									
									
								
							@@ -1,19 +1,16 @@
 | 
				
			|||||||
extern crate html5ever;
 | 
					extern crate html5ever;
 | 
				
			||||||
extern crate markup5ever_rcdom as rcdom;
 | 
					
 | 
				
			||||||
 | 
					use std::time::Instant;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use db::{connect, Website};
 | 
					use db::{connect, Website};
 | 
				
			||||||
use html5ever::{
 | 
					 | 
				
			||||||
    local_name, parse_document, tendril::TendrilSink, tree_builder::TreeBuilderOpts, ParseOpts,
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
use rcdom::RcDom;
 | 
					 | 
				
			||||||
use s3::S3;
 | 
					use s3::S3;
 | 
				
			||||||
use std::time::Instant;
 | 
					use surrealdb::{engine::remote::ws::Client, Surreal};
 | 
				
			||||||
use surrealdb::{engine::remote::ws::Client, sql::Thing, Surreal};
 | 
					 | 
				
			||||||
use tracing::{debug, info, instrument, trace, trace_span};
 | 
					use tracing::{debug, info, instrument, trace, trace_span};
 | 
				
			||||||
use tracing_subscriber::EnvFilter;
 | 
					use tracing_subscriber::EnvFilter;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
mod db;
 | 
					mod db;
 | 
				
			||||||
mod s3;
 | 
					mod s3;
 | 
				
			||||||
 | 
					mod parser;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct Config<'a> {
 | 
					struct Config<'a> {
 | 
				
			||||||
    surreal_ns: &'a str,
 | 
					    surreal_ns: &'a str,
 | 
				
			||||||
@@ -39,19 +36,19 @@ async fn main() {
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
    let config = Config {
 | 
					    let config = Config {
 | 
				
			||||||
        surreal_ns: "test",
 | 
					        surreal_ns: "test",
 | 
				
			||||||
        surreal_db: "v1.5",
 | 
					        surreal_db: "v1.7",
 | 
				
			||||||
        surreal_url: "localhost:8000",
 | 
					        surreal_url: "localhost:8000",
 | 
				
			||||||
        surreal_username: "root",
 | 
					        surreal_username: "root",
 | 
				
			||||||
        surreal_password: "root",
 | 
					        surreal_password: "root",
 | 
				
			||||||
        s3_url: "http://localhost:9000",
 | 
					        s3_url: "http://localhost:9000",
 | 
				
			||||||
        s3_bucket: "v1.5",
 | 
					        s3_bucket: "v1.7",
 | 
				
			||||||
        s3_access_key: "8tUJn7e1paMFZQr0PKIT",
 | 
					        s3_access_key: "8tUJn7e1paMFZQr0PKIT",
 | 
				
			||||||
        s3_secret_key: "uSMvYxNOeCejCUgXVqgTfYlUEcmiZY0xcZ91M9E0",
 | 
					        s3_secret_key: "uSMvYxNOeCejCUgXVqgTfYlUEcmiZY0xcZ91M9E0",
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Would probably take these in as parameters from a cli
 | 
					    // Would probably take these in as parameters from a cli
 | 
				
			||||||
    let starting_url = "https://oliveratkinson.net/";
 | 
					    let starting_url = "https://oliveratkinson.net/";
 | 
				
			||||||
    let budget = 200;
 | 
					    let budget = 15;
 | 
				
			||||||
    let mut crawled = 0;
 | 
					    let mut crawled = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting.");
 | 
					    let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting.");
 | 
				
			||||||
@@ -75,11 +72,7 @@ async fn main() {
 | 
				
			|||||||
    let span = trace_span!("Loop");
 | 
					    let span = trace_span!("Loop");
 | 
				
			||||||
    let span = span.enter();
 | 
					    let span = span.enter();
 | 
				
			||||||
    while crawled < budget {
 | 
					    while crawled < budget {
 | 
				
			||||||
        let get_num = if budget - crawled < 100 {
 | 
					        let get_num = if budget - crawled < 100 { budget - crawled } else { 100 };
 | 
				
			||||||
            budget - crawled
 | 
					 | 
				
			||||||
        } else {
 | 
					 | 
				
			||||||
            100
 | 
					 | 
				
			||||||
        };
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let uncrawled = get_uncrawled_links(&db, get_num).await;
 | 
					        let uncrawled = get_uncrawled_links(&db, get_num).await;
 | 
				
			||||||
        if uncrawled.len() == 0 {
 | 
					        if uncrawled.len() == 0 {
 | 
				
			||||||
@@ -93,6 +86,7 @@ async fn main() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        for mut site in uncrawled {
 | 
					        for mut site in uncrawled {
 | 
				
			||||||
            get(&mut site, &db, &reqwest, &s3, &mut crawled).await;
 | 
					            get(&mut site, &db, &reqwest, &s3, &mut crawled).await;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            let percent = format!("{:.2}%", (crawled as f32 / budget as f32) * 100f32);
 | 
					            let percent = format!("{:.2}%", (crawled as f32 / budget as f32) * 100f32);
 | 
				
			||||||
            info!("Crawled {crawled} out of {budget} pages. ({percent})");
 | 
					            info!("Crawled {crawled} out of {budget} pages. ({percent})");
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -103,7 +97,7 @@ async fn main() {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[instrument(skip_all)]
 | 
					#[instrument(skip_all)]
 | 
				
			||||||
/// A quick helper function for downloading a url
 | 
					/// Downloads and crawls and stores a webpage.
 | 
				
			||||||
async fn get(
 | 
					async fn get(
 | 
				
			||||||
    site: &mut Website,
 | 
					    site: &mut Website,
 | 
				
			||||||
    db: &Surreal<Client>,
 | 
					    db: &Surreal<Client>,
 | 
				
			||||||
@@ -113,109 +107,20 @@ async fn get(
 | 
				
			|||||||
) {
 | 
					) {
 | 
				
			||||||
    trace!("Get: {}", site.to_string());
 | 
					    trace!("Get: {}", site.to_string());
 | 
				
			||||||
    let timer = Timer::start("Got page");
 | 
					    let timer = Timer::start("Got page");
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if let Ok(response) = reqwest.get(site.to_string()).send().await {
 | 
					    if let Ok(response) = reqwest.get(site.to_string()).send().await {
 | 
				
			||||||
        timer.stop();
 | 
					        timer.stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Get body
 | 
					        // Get body
 | 
				
			||||||
        let data = response.text().await.unwrap();
 | 
					        let data = response.text().await.unwrap();
 | 
				
			||||||
        let opts = ParseOpts {
 | 
					        // Store document
 | 
				
			||||||
            tree_builder: TreeBuilderOpts {
 | 
					 | 
				
			||||||
                drop_doctype: true,
 | 
					 | 
				
			||||||
                ..Default::default()
 | 
					 | 
				
			||||||
            },
 | 
					 | 
				
			||||||
            ..Default::default()
 | 
					 | 
				
			||||||
        };
 | 
					 | 
				
			||||||
        s3.store(&data, &site.site).await; 
 | 
					        s3.store(&data, &site.site).await; 
 | 
				
			||||||
 | 
					        // Parse document and store relationships
 | 
				
			||||||
        // Get DOM
 | 
					        parser::parse(db, site, data).await;
 | 
				
			||||||
        let dom = parse_document(RcDom::default(), opts)
 | 
					 | 
				
			||||||
            .from_utf8()
 | 
					 | 
				
			||||||
            .read_from(&mut data.as_bytes())
 | 
					 | 
				
			||||||
            .unwrap();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // TODO save the dom to minio if a flag is set
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // Modify record in database
 | 
					 | 
				
			||||||
        site.set_crawled();
 | 
					 | 
				
			||||||
        site.store(db).await;
 | 
					 | 
				
			||||||
        trace!("Got: {}", site.to_string());
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // Walk all the children nodes, searching for links to other pages.
 | 
					 | 
				
			||||||
        let mut buffer = Vec::new();
 | 
					 | 
				
			||||||
        let timer = Timer::start("Walked");
 | 
					 | 
				
			||||||
        walk(&dom.document, &db, &site, &mut buffer).await;
 | 
					 | 
				
			||||||
        timer.stop();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // Put all the found links into the database.
 | 
					 | 
				
			||||||
        site.links_to(buffer, &db).await;
 | 
					 | 
				
			||||||
        *count += 1;
 | 
					        *count += 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    trace!("Failed to get: {}", site.to_string());
 | 
					    trace!("Failed to get: {}", site.to_string());
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Walks the givin site, placing it's findings in the database
 | 
					 | 
				
			||||||
async fn walk(
 | 
					 | 
				
			||||||
    node: &rcdom::Handle,
 | 
					 | 
				
			||||||
    db: &Surreal<Client>,
 | 
					 | 
				
			||||||
    site: &Website,
 | 
					 | 
				
			||||||
    links_to: &mut Vec<Thing>,
 | 
					 | 
				
			||||||
) {
 | 
					 | 
				
			||||||
    let span = trace_span!("Walk");
 | 
					 | 
				
			||||||
    let span = span.enter();
 | 
					 | 
				
			||||||
    // Match each node - node basically means element.
 | 
					 | 
				
			||||||
    match &node.data {
 | 
					 | 
				
			||||||
        rcdom::NodeData::Element { name, attrs, .. } => {
 | 
					 | 
				
			||||||
            for attr in attrs.borrow().clone() {
 | 
					 | 
				
			||||||
                match name.local {
 | 
					 | 
				
			||||||
                    local_name!("a")
 | 
					 | 
				
			||||||
                    | local_name!("audio")
 | 
					 | 
				
			||||||
                    | local_name!("area")
 | 
					 | 
				
			||||||
                    | local_name!("img")
 | 
					 | 
				
			||||||
                    | local_name!("link")
 | 
					 | 
				
			||||||
                    | local_name!("object")
 | 
					 | 
				
			||||||
                    | local_name!("source")
 | 
					 | 
				
			||||||
                    | local_name!("base")
 | 
					 | 
				
			||||||
                    | local_name!("video") => {
 | 
					 | 
				
			||||||
                        let attribute_name = attr.name.local.to_string();
 | 
					 | 
				
			||||||
                        if attribute_name == "src"
 | 
					 | 
				
			||||||
                            || attribute_name == "href"
 | 
					 | 
				
			||||||
                            || attribute_name == "data"
 | 
					 | 
				
			||||||
                        {
 | 
					 | 
				
			||||||
                            // Get clone of the current site object
 | 
					 | 
				
			||||||
                            let mut web = site.clone();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                            // Set url
 | 
					 | 
				
			||||||
                            let mut url = web.site;
 | 
					 | 
				
			||||||
                            url.set_fragment(None); // removes #xyz
 | 
					 | 
				
			||||||
                            let joined = url.join(&attr.value).unwrap();
 | 
					 | 
				
			||||||
                            web.site = joined;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                            // Set other attributes
 | 
					 | 
				
			||||||
                            web.crawled = false;
 | 
					 | 
				
			||||||
                            // TODO set element name
 | 
					 | 
				
			||||||
                            // let element_name = name.local.to_string();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                            if let Some(id) = web.store(db).await {
 | 
					 | 
				
			||||||
                                links_to.push(id);
 | 
					 | 
				
			||||||
                            }
 | 
					 | 
				
			||||||
                        }
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                    local_name!("button") | local_name!("meta") | local_name!("iframe") => {
 | 
					 | 
				
			||||||
                        // dbg!(attrs);
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                    _ => {}
 | 
					 | 
				
			||||||
                };
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        _ => {}
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
    drop(span);
 | 
					 | 
				
			||||||
    for child in node.children.borrow().iter() {
 | 
					 | 
				
			||||||
        Box::pin(walk(child, db, site, links_to)).await;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/// Returns uncrawled links
 | 
					/// Returns uncrawled links
 | 
				
			||||||
async fn get_uncrawled_links(db: &Surreal<Client>, mut count: usize) -> Vec<Website> {
 | 
					async fn get_uncrawled_links(db: &Surreal<Client>, mut count: usize) -> Vec<Website> {
 | 
				
			||||||
    if count > 100 {
 | 
					    if count > 100 {
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										95
									
								
								src/parser.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								src/parser.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,95 @@
 | 
				
			|||||||
 | 
					use std::default::Default;
 | 
				
			||||||
 | 
					use std::str::FromStr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use html5ever::tokenizer::{BufferQueue, TokenizerResult};
 | 
				
			||||||
 | 
					use html5ever::tokenizer::{StartTag, TagToken};
 | 
				
			||||||
 | 
					use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts};
 | 
				
			||||||
 | 
					use html5ever::{local_name, tendril::*};
 | 
				
			||||||
 | 
					use surrealdb::engine::remote::ws::Client;
 | 
				
			||||||
 | 
					use surrealdb::Surreal;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use crate::db::Website;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone)]
 | 
				
			||||||
 | 
					struct LinkParser<'a> {
 | 
				
			||||||
 | 
					    site: &'a Website,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl TokenSink for LinkParser<'_> {
 | 
				
			||||||
 | 
					    type Handle = Vec<Website>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<Self::Handle> {
 | 
				
			||||||
 | 
					        match token {
 | 
				
			||||||
 | 
					            TagToken(tag) => {
 | 
				
			||||||
 | 
					                if tag.kind == StartTag {
 | 
				
			||||||
 | 
					                    match tag.name {
 | 
				
			||||||
 | 
					                        local_name!("a")
 | 
				
			||||||
 | 
					                        | local_name!("audio")
 | 
				
			||||||
 | 
					                        | local_name!("area")
 | 
				
			||||||
 | 
					                        | local_name!("img")
 | 
				
			||||||
 | 
					                        | local_name!("link")
 | 
				
			||||||
 | 
					                        | local_name!("object")
 | 
				
			||||||
 | 
					                        | local_name!("source")
 | 
				
			||||||
 | 
					                        | local_name!("base")
 | 
				
			||||||
 | 
					                        | local_name!("video") => {
 | 
				
			||||||
 | 
					                            let mut links = Vec::new();
 | 
				
			||||||
 | 
					                            for attr in &tag.attrs {
 | 
				
			||||||
 | 
					                                let attr_name = attr.name.local.to_string();
 | 
				
			||||||
 | 
					                                if attr_name == "src" || attr_name == "href" || attr_name == "data"
 | 
				
			||||||
 | 
					                                {
 | 
				
			||||||
 | 
					                                    // Get clone of the current site object
 | 
				
			||||||
 | 
					                                    let mut web = self.site.clone();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    // Set url
 | 
				
			||||||
 | 
					                                    let mut url = web.site;
 | 
				
			||||||
 | 
					                                    url.set_fragment(None); // removes #xyz
 | 
				
			||||||
 | 
					                                    let joined = url.join(&attr.value).unwrap();
 | 
				
			||||||
 | 
					                                    web.site = joined;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    web.crawled = false;
 | 
				
			||||||
 | 
					                                    
 | 
				
			||||||
 | 
					                                    links.push(web);
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            
 | 
				
			||||||
 | 
					                            return TokenSinkResult::Script(links);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        local_name!("button") | local_name!("meta") | local_name!("iframe") => {
 | 
				
			||||||
 | 
					                            // dbg!(attrs);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        _ => {}
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            _ => {}
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        TokenSinkResult::Continue
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn parse(db: &Surreal<Client>, site: &mut Website, data: String) {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    site.set_crawled();
 | 
				
			||||||
 | 
					    site.store(db).await;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    let sink = LinkParser { site };
 | 
				
			||||||
 | 
					    let chunk = Tendril::from_str(&data).unwrap();
 | 
				
			||||||
 | 
					    let mut input = BufferQueue::default();
 | 
				
			||||||
 | 
					    input.push_back(chunk.try_reinterpret::<fmt::UTF8>().unwrap());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let token = Tokenizer::new(sink.clone(), TokenizerOpts::default());
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    let mut links_to = Vec::new();
 | 
				
			||||||
 | 
					    while !input.is_empty() {
 | 
				
			||||||
 | 
					        if let TokenizerResult::Script(s) = token.feed(&mut input) {
 | 
				
			||||||
 | 
					            for mut web in s {
 | 
				
			||||||
 | 
					                if let Some(id) = web.store(db).await {
 | 
				
			||||||
 | 
					                    links_to.push(id);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    sink.site.links_to(links_to, db).await;
 | 
				
			||||||
 | 
					    assert!(input.is_empty());
 | 
				
			||||||
 | 
					    token.end();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user