works more, but still not all the way
This commit is contained in:
		
							
								
								
									
										3
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
								
							@@ -8,6 +8,9 @@
 | 
			
		||||
            "type": "lldb",
 | 
			
		||||
            "request": "launch",
 | 
			
		||||
            "name": "Debug executable 'surreal_spider'",
 | 
			
		||||
            "env": {
 | 
			
		||||
                "RUST_LOG": "surreal_spider=trace",
 | 
			
		||||
            },
 | 
			
		||||
            "cargo": {
 | 
			
		||||
                "args": [
 | 
			
		||||
                    "build",
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										316
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										316
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							@@ -829,6 +829,17 @@ dependencies = [
 | 
			
		||||
 "winapi",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "displaydoc"
 | 
			
		||||
version = "0.2.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "proc-macro2",
 | 
			
		||||
 "quote",
 | 
			
		||||
 "syn 2.0.85",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "dmp"
 | 
			
		||||
version = "0.2.0"
 | 
			
		||||
@@ -1438,6 +1449,124 @@ dependencies = [
 | 
			
		||||
 "cc",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_collections"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "displaydoc",
 | 
			
		||||
 "yoke",
 | 
			
		||||
 "zerofrom",
 | 
			
		||||
 "zerovec",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_locid"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "displaydoc",
 | 
			
		||||
 "litemap",
 | 
			
		||||
 "tinystr",
 | 
			
		||||
 "writeable",
 | 
			
		||||
 "zerovec",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_locid_transform"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "displaydoc",
 | 
			
		||||
 "icu_locid",
 | 
			
		||||
 "icu_locid_transform_data",
 | 
			
		||||
 "icu_provider",
 | 
			
		||||
 "tinystr",
 | 
			
		||||
 "zerovec",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_locid_transform_data"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_normalizer"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "displaydoc",
 | 
			
		||||
 "icu_collections",
 | 
			
		||||
 "icu_normalizer_data",
 | 
			
		||||
 "icu_properties",
 | 
			
		||||
 "icu_provider",
 | 
			
		||||
 "smallvec",
 | 
			
		||||
 "utf16_iter",
 | 
			
		||||
 "utf8_iter",
 | 
			
		||||
 "write16",
 | 
			
		||||
 "zerovec",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_normalizer_data"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_properties"
 | 
			
		||||
version = "1.5.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "displaydoc",
 | 
			
		||||
 "icu_collections",
 | 
			
		||||
 "icu_locid_transform",
 | 
			
		||||
 "icu_properties_data",
 | 
			
		||||
 "icu_provider",
 | 
			
		||||
 "tinystr",
 | 
			
		||||
 "zerovec",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_properties_data"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_provider"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "displaydoc",
 | 
			
		||||
 "icu_locid",
 | 
			
		||||
 "icu_provider_macros",
 | 
			
		||||
 "stable_deref_trait",
 | 
			
		||||
 "tinystr",
 | 
			
		||||
 "writeable",
 | 
			
		||||
 "yoke",
 | 
			
		||||
 "zerofrom",
 | 
			
		||||
 "zerovec",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "icu_provider_macros"
 | 
			
		||||
version = "1.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "proc-macro2",
 | 
			
		||||
 "quote",
 | 
			
		||||
 "syn 2.0.85",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "ident_case"
 | 
			
		||||
version = "1.0.1"
 | 
			
		||||
@@ -1446,12 +1575,23 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "idna"
 | 
			
		||||
version = "0.5.0"
 | 
			
		||||
version = "1.0.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
 | 
			
		||||
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "unicode-bidi",
 | 
			
		||||
 "unicode-normalization",
 | 
			
		||||
 "idna_adapter",
 | 
			
		||||
 "smallvec",
 | 
			
		||||
 "utf8_iter",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "idna_adapter"
 | 
			
		||||
version = "1.2.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "icu_normalizer",
 | 
			
		||||
 "icu_properties",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@@ -1562,7 +1702,7 @@ dependencies = [
 | 
			
		||||
 "petgraph",
 | 
			
		||||
 "pico-args",
 | 
			
		||||
 "regex",
 | 
			
		||||
 "regex-syntax",
 | 
			
		||||
 "regex-syntax 0.8.5",
 | 
			
		||||
 "string_cache",
 | 
			
		||||
 "term",
 | 
			
		||||
 "tiny-keccak",
 | 
			
		||||
@@ -1576,7 +1716,7 @@ version = "0.20.2"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "regex-automata",
 | 
			
		||||
 "regex-automata 0.4.8",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@@ -1634,6 +1774,12 @@ version = "0.4.14"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "litemap"
 | 
			
		||||
version = "0.7.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "lock_api"
 | 
			
		||||
version = "0.4.12"
 | 
			
		||||
@@ -1702,6 +1848,15 @@ dependencies = [
 | 
			
		||||
 "xml5ever",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "matchers"
 | 
			
		||||
version = "0.1.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "regex-automata 0.1.10",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "matrixmultiply"
 | 
			
		||||
version = "0.3.9"
 | 
			
		||||
@@ -2516,8 +2671,17 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "aho-corasick",
 | 
			
		||||
 "memchr",
 | 
			
		||||
 "regex-automata",
 | 
			
		||||
 "regex-syntax",
 | 
			
		||||
 "regex-automata 0.4.8",
 | 
			
		||||
 "regex-syntax 0.8.5",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "regex-automata"
 | 
			
		||||
version = "0.1.10"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "regex-syntax 0.6.29",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@@ -2528,9 +2692,15 @@ checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "aho-corasick",
 | 
			
		||||
 "memchr",
 | 
			
		||||
 "regex-syntax",
 | 
			
		||||
 "regex-syntax 0.8.5",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "regex-syntax"
 | 
			
		||||
version = "0.6.29"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "regex-syntax"
 | 
			
		||||
version = "0.8.5"
 | 
			
		||||
@@ -3273,6 +3443,7 @@ dependencies = [
 | 
			
		||||
 "tokio",
 | 
			
		||||
 "tracing",
 | 
			
		||||
 "tracing-subscriber",
 | 
			
		||||
 "url",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@@ -3451,6 +3622,17 @@ dependencies = [
 | 
			
		||||
 "futures-core",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "synstructure"
 | 
			
		||||
version = "0.13.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "proc-macro2",
 | 
			
		||||
 "quote",
 | 
			
		||||
 "syn 2.0.85",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "system-configuration"
 | 
			
		||||
version = "0.6.1"
 | 
			
		||||
@@ -3583,6 +3765,16 @@ dependencies = [
 | 
			
		||||
 "crunchy",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "tinystr"
 | 
			
		||||
version = "0.7.6"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "displaydoc",
 | 
			
		||||
 "zerovec",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "tinyvec"
 | 
			
		||||
version = "1.8.0"
 | 
			
		||||
@@ -3750,10 +3942,14 @@ version = "0.3.18"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "matchers",
 | 
			
		||||
 "nu-ansi-term",
 | 
			
		||||
 "once_cell",
 | 
			
		||||
 "regex",
 | 
			
		||||
 "sharded-slab",
 | 
			
		||||
 "smallvec",
 | 
			
		||||
 "thread_local",
 | 
			
		||||
 "tracing",
 | 
			
		||||
 "tracing-core",
 | 
			
		||||
 "tracing-log",
 | 
			
		||||
]
 | 
			
		||||
@@ -3826,12 +4022,6 @@ version = "2.8.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "unicode-bidi"
 | 
			
		||||
version = "0.3.17"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "unicode-ident"
 | 
			
		||||
version = "1.0.13"
 | 
			
		||||
@@ -3883,13 +4073,14 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "url"
 | 
			
		||||
version = "2.5.2"
 | 
			
		||||
version = "2.5.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c"
 | 
			
		||||
checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "form_urlencoded",
 | 
			
		||||
 "idna",
 | 
			
		||||
 "percent-encoding",
 | 
			
		||||
 "serde",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@@ -3904,6 +4095,18 @@ version = "0.7.6"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "utf16_iter"
 | 
			
		||||
version = "1.0.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "utf8_iter"
 | 
			
		||||
version = "1.0.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "uuid"
 | 
			
		||||
version = "1.11.0"
 | 
			
		||||
@@ -4241,6 +4444,18 @@ dependencies = [
 | 
			
		||||
 "memchr",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "write16"
 | 
			
		||||
version = "1.0.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "writeable"
 | 
			
		||||
version = "0.5.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "ws_stream_wasm"
 | 
			
		||||
version = "0.7.4"
 | 
			
		||||
@@ -4280,6 +4495,30 @@ dependencies = [
 | 
			
		||||
 "markup5ever 0.14.0",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "yoke"
 | 
			
		||||
version = "0.7.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "serde",
 | 
			
		||||
 "stable_deref_trait",
 | 
			
		||||
 "yoke-derive",
 | 
			
		||||
 "zerofrom",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "yoke-derive"
 | 
			
		||||
version = "0.7.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "proc-macro2",
 | 
			
		||||
 "quote",
 | 
			
		||||
 "syn 2.0.85",
 | 
			
		||||
 "synstructure",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "zerocopy"
 | 
			
		||||
version = "0.7.35"
 | 
			
		||||
@@ -4301,8 +4540,51 @@ dependencies = [
 | 
			
		||||
 "syn 2.0.85",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "zerofrom"
 | 
			
		||||
version = "0.1.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "zerofrom-derive",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "zerofrom-derive"
 | 
			
		||||
version = "0.1.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "proc-macro2",
 | 
			
		||||
 "quote",
 | 
			
		||||
 "syn 2.0.85",
 | 
			
		||||
 "synstructure",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "zeroize"
 | 
			
		||||
version = "1.8.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "zerovec"
 | 
			
		||||
version = "0.10.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "yoke",
 | 
			
		||||
 "zerofrom",
 | 
			
		||||
 "zerovec-derive",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "zerovec-derive"
 | 
			
		||||
version = "0.10.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "proc-macro2",
 | 
			
		||||
 "quote",
 | 
			
		||||
 "syn 2.0.85",
 | 
			
		||||
]
 | 
			
		||||
 
 | 
			
		||||
@@ -11,4 +11,5 @@ serde = { version = "1.0.214", features = ["derive"] }
 | 
			
		||||
surrealdb = "2.0.4"
 | 
			
		||||
tokio = { version="1.41.0", features = ["full"] }
 | 
			
		||||
tracing = "0.1.40"
 | 
			
		||||
tracing-subscriber = "0.3.18"
 | 
			
		||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
 | 
			
		||||
url = { version = "2.5.3", features = ["serde"] }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										132
									
								
								src/db.rs
									
									
									
									
									
								
							
							
						
						
									
										132
									
								
								src/db.rs
									
									
									
									
									
								
							@@ -1,22 +1,140 @@
 | 
			
		||||
use serde::{Deserialize, Serialize};
 | 
			
		||||
use surrealdb::{engine::remote::ws::{Client, Ws}, opt::auth::Root, sql::Thing, Surreal};
 | 
			
		||||
use surrealdb::{
 | 
			
		||||
    engine::remote::ws::{Client, Ws},
 | 
			
		||||
    opt::auth::Root,
 | 
			
		||||
    sql::Thing,
 | 
			
		||||
    Surreal,
 | 
			
		||||
};
 | 
			
		||||
use tracing::{debug, error, info, instrument};
 | 
			
		||||
use url::Url;
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Serialize)]
 | 
			
		||||
#[derive(Debug, Serialize, Deserialize, Clone)]
 | 
			
		||||
pub struct Website {
 | 
			
		||||
    pub site: String,
 | 
			
		||||
    pub href: String,
 | 
			
		||||
    pub crawled: bool
 | 
			
		||||
    /// The url that this data is found at
 | 
			
		||||
    site: Url,
 | 
			
		||||
    /// The url as defined in the <a> tag
 | 
			
		||||
    href: Url,
 | 
			
		||||
    /// Wether or not this link has been crawled yet
 | 
			
		||||
    crawled: bool,
 | 
			
		||||
    /// Wether or not the href was doctored
 | 
			
		||||
    doctored_href: bool,
 | 
			
		||||
    original_href: Option<String>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Website {
 | 
			
		||||
    /// Creates a blank site (assumes that url param is site's root)
 | 
			
		||||
    pub fn new(url: &str, href: &str, crawled: bool) -> Self {
 | 
			
		||||
        let mut new = Self::from(url);
 | 
			
		||||
        new.crawled = crawled;
 | 
			
		||||
        new.original_href = Some(href.to_string());
 | 
			
		||||
        new.href =
 | 
			
		||||
            match Url::parse(href) {
 | 
			
		||||
                Ok(e) => e,
 | 
			
		||||
                Err(e) => {
 | 
			
		||||
                    match e {
 | 
			
		||||
                        url::ParseError::RelativeUrlWithoutBase => {
 | 
			
		||||
                            // Try to combine the scheme_host and href to get a useable domain
 | 
			
		||||
                            new.doctored_href = true;
 | 
			
		||||
 | 
			
		||||
                            let url = if !url.ends_with('/') && !href.starts_with('/') {
 | 
			
		||||
                                format!("{url}/{href}")
 | 
			
		||||
                            } else {
 | 
			
		||||
                                format!("{url}{href}")
 | 
			
		||||
                            };
 | 
			
		||||
 | 
			
		||||
                            // paste the domain onto the begining of the href
 | 
			
		||||
                            Url::parse(&url).map_or_else(|err| {
 | 
			
		||||
                                debug!("Parsing {url} with {href}");
 | 
			
		||||
                                error!("{err} Failed to parse href into url on second try. Aborting");
 | 
			
		||||
                                panic!("See error logs for more info.");
 | 
			
		||||
                            }, |ok| ok)
 | 
			
		||||
                        }
 | 
			
		||||
                        _ => {
 | 
			
		||||
                            error!("{e}");
 | 
			
		||||
                            panic!("See error logs for more info.");
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
        new
 | 
			
		||||
    }
 | 
			
		||||
    pub fn crawled(&mut self) {
 | 
			
		||||
        self.crawled = true
 | 
			
		||||
    }
 | 
			
		||||
    pub fn href_str(&self) -> &str {
 | 
			
		||||
        self.href.as_str()
 | 
			
		||||
    }
 | 
			
		||||
    pub fn site(&self) -> String {
 | 
			
		||||
        self.site.to_string()
 | 
			
		||||
    }
 | 
			
		||||
    pub fn domain_str(&self) -> &str {
 | 
			
		||||
        self.site.as_str()
 | 
			
		||||
    }
 | 
			
		||||
    #[instrument(skip_all)]
 | 
			
		||||
    pub async fn store(&mut self, db: &Surreal<Client>) {
 | 
			
		||||
        // is root record?
 | 
			
		||||
        if self.href.path() == "/" {
 | 
			
		||||
            // Upsert is create or update
 | 
			
		||||
            // Whereas Update is just update
 | 
			
		||||
            let record = ("website", &self.href.to_string());
 | 
			
		||||
 | 
			
		||||
            let crawled = if let Some(old) = db.select(record).await.unwrap() {
 | 
			
		||||
                let old: Website = old; // infer type
 | 
			
		||||
                old.crawled
 | 
			
		||||
            } else {false};
 | 
			
		||||
 | 
			
		||||
            if !self.crawled {self.crawled = crawled};
 | 
			
		||||
 | 
			
		||||
            match db.upsert(record).content(self.clone()).await {
 | 
			
		||||
                Ok(e) => {
 | 
			
		||||
                    if let Some(a) = &e {
 | 
			
		||||
                        let _: &Record = a;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                Err(e) => {
 | 
			
		||||
                    error!("{}", e);
 | 
			
		||||
                },
 | 
			
		||||
            };
 | 
			
		||||
        } else {
 | 
			
		||||
            let _: Option<Record> = match db.create("website").content(self.clone()).await {
 | 
			
		||||
                Ok(e) => {
 | 
			
		||||
                    if let Some(a) = &e {
 | 
			
		||||
                        let _: &Record = a;
 | 
			
		||||
                    }
 | 
			
		||||
                    e
 | 
			
		||||
                }
 | 
			
		||||
                Err(_) => todo!(),
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl From<&str> for Website {
 | 
			
		||||
    /// site == href, crawled = false
 | 
			
		||||
    fn from(value: &str) -> Self {
 | 
			
		||||
        let site = match Url::parse(value) {
 | 
			
		||||
            Ok(a) => a,
 | 
			
		||||
            Err(_) => todo!(),
 | 
			
		||||
        };
 | 
			
		||||
        Self {
 | 
			
		||||
            href: site.clone(),
 | 
			
		||||
            crawled: false,
 | 
			
		||||
            site,
 | 
			
		||||
            doctored_href: false,
 | 
			
		||||
            original_href: None,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Serialize)]
 | 
			
		||||
pub struct Email {
 | 
			
		||||
    pub email: String
 | 
			
		||||
    pub email: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Deserialize)]
 | 
			
		||||
pub struct Record {
 | 
			
		||||
    #[allow(dead_code)]
 | 
			
		||||
    id: Thing,
 | 
			
		||||
    pub id: Thing,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn connect() -> surrealdb::Result<Surreal<Client>> {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										95
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										95
									
								
								src/main.rs
									
									
									
									
									
								
							@@ -2,33 +2,60 @@ extern crate markup5ever_rcdom as rcdom;
 | 
			
		||||
extern crate html5ever;
 | 
			
		||||
 | 
			
		||||
use std::rc::Rc;
 | 
			
		||||
use db::connect;
 | 
			
		||||
use db::{connect, Website};
 | 
			
		||||
use html5ever::{parse_document, tendril::TendrilSink, tree_builder::TreeBuilderOpts, ParseOpts};
 | 
			
		||||
use rcdom::{Node, RcDom};
 | 
			
		||||
use surrealdb::{engine::remote::ws::Client, Surreal};
 | 
			
		||||
use tracing::{debug, error, info, warn};
 | 
			
		||||
use tracing::{debug, info, instrument};
 | 
			
		||||
use tracing_subscriber::EnvFilter;
 | 
			
		||||
 | 
			
		||||
mod db;
 | 
			
		||||
 | 
			
		||||
#[tokio::main]
 | 
			
		||||
async fn main() {
 | 
			
		||||
    tracing_subscriber::fmt::init();
 | 
			
		||||
    tracing_subscriber::fmt()
 | 
			
		||||
        .with_env_filter(EnvFilter::from_default_env())
 | 
			
		||||
        .with_line_number(true)
 | 
			
		||||
        .without_time()
 | 
			
		||||
        .init();
 | 
			
		||||
    debug!("Starting...");
 | 
			
		||||
 | 
			
		||||
    let url = "https://oliveratkinson.net";
 | 
			
		||||
    // Would probably take these in as parameters from a cli
 | 
			
		||||
    let url = "https://oliveratkinson.net/";
 | 
			
		||||
    let budget = 50; 
 | 
			
		||||
    let mut crawled = 0;
 | 
			
		||||
 | 
			
		||||
    let db = connect().await.expect("Failed to connect to db, aborting.");
 | 
			
		||||
    let dom = get(url).await;
 | 
			
		||||
 | 
			
		||||
    walk(&dom, &db, url).await;
 | 
			
		||||
    // Kick off the whole machine - This Website object doesn't matter, it's just to allow for
 | 
			
		||||
    // get() to work.
 | 
			
		||||
    let mut site = Website::from(url);
 | 
			
		||||
    let dom = get(&mut site, &db).await.expect("Inital page returned None.");
 | 
			
		||||
    crawled += 1;
 | 
			
		||||
    walk(&dom, &db, &site).await;
 | 
			
		||||
 | 
			
		||||
    while crawled < budget {
 | 
			
		||||
        let uncrawled = get_uncrawled_links(&db).await;
 | 
			
		||||
        debug!("Crawling {} pages...", uncrawled.len());
 | 
			
		||||
 | 
			
		||||
        for mut site in uncrawled {
 | 
			
		||||
            if let Some(dom) = get(&mut site, &db).await {
 | 
			
		||||
                walk(&dom, &db, &site).await;
 | 
			
		||||
                crawled += 1;
 | 
			
		||||
                let percent = format!("{:.2}%", (crawled as f32/budget as f32) * 100f32);
 | 
			
		||||
                info!("Crawled {crawled} out of {budget} pages. ({percent})");
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    info!("Done");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async fn get(url: &str) -> Rc<Node> {
 | 
			
		||||
    let response = reqwest::get(url).await.unwrap();
 | 
			
		||||
#[instrument(skip_all)]
 | 
			
		||||
/// A quick helper function for downloading a url
 | 
			
		||||
async fn get(site: &mut Website, db: &Surreal<Client>) -> Option<Rc<Node>> {
 | 
			
		||||
    if let Ok(response) = reqwest::get(site.href_str()).await {
 | 
			
		||||
        let data = response.text().await.unwrap();
 | 
			
		||||
 | 
			
		||||
        let opts = ParseOpts {
 | 
			
		||||
            tree_builder: TreeBuilderOpts {
 | 
			
		||||
                drop_doctype: true,
 | 
			
		||||
@@ -41,48 +68,31 @@ async fn get(url: &str) -> Rc<Node> {
 | 
			
		||||
            .from_utf8()
 | 
			
		||||
            .read_from(&mut data.as_bytes())
 | 
			
		||||
            .unwrap();
 | 
			
		||||
    dom.document
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) {
 | 
			
		||||
    // Insert Or Update
 | 
			
		||||
    let _: Option<Vec<db::Record>> = match db.upsert(("website", site_name)).content(db::Website { href: String::from("/"), crawled: true, site: site_name.to_string() } ).await {
 | 
			
		||||
        Ok(e) => {
 | 
			
		||||
            // Return this for type coercion
 | 
			
		||||
            e
 | 
			
		||||
        },
 | 
			
		||||
        Err(e) => {
 | 
			
		||||
            // error!("{}", e);
 | 
			
		||||
        site.crawled();
 | 
			
		||||
        site.store(db).await;
 | 
			
		||||
        return Some(dom.document);
 | 
			
		||||
    }
 | 
			
		||||
    None
 | 
			
		||||
}
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
/// Walks the givin site, placing it's findings in the database
 | 
			
		||||
async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &Website) {
 | 
			
		||||
    // Insert Or Update
 | 
			
		||||
    // create_root(site_name, db).await;
 | 
			
		||||
 | 
			
		||||
    match &node.data {
 | 
			
		||||
        rcdom::NodeData::Element { name, attrs, template_contents, mathml_annotation_xml_integration_point } => {
 | 
			
		||||
            for attr in attrs.borrow().clone() {
 | 
			
		||||
                let name = name.local.to_string();
 | 
			
		||||
                if name == "a" {
 | 
			
		||||
                if name.local.to_string() == "a" {
 | 
			
		||||
                    if attr.value.starts_with("mailto") {
 | 
			
		||||
                        // mailto link, lol
 | 
			
		||||
                        let created: Option<db::Record> = db.create("email").content(db::Email {
 | 
			
		||||
                        let _created: Option<db::Record> = db.create("email").content(db::Email {
 | 
			
		||||
                            email: attr.value.to_string()
 | 
			
		||||
                        }).await.unwrap();
 | 
			
		||||
                        warn!("{:?}", created)
 | 
			
		||||
                    } else {
 | 
			
		||||
                        // FIXME this isn't actually creating records...?
 | 
			
		||||
                        let _: Option<db::Record> = match db.create("website").content(db::Website {
 | 
			
		||||
                            href: attr.value.to_string(),
 | 
			
		||||
                            crawled: false,
 | 
			
		||||
                            site: site_name.to_string()
 | 
			
		||||
                        }).await {
 | 
			
		||||
                            Ok(e) => {
 | 
			
		||||
                                if let Some(a) = &e {
 | 
			
		||||
                                    debug!("{:?}", a);
 | 
			
		||||
                                }
 | 
			
		||||
                                e
 | 
			
		||||
                            },
 | 
			
		||||
                            Err(_) => todo!(),
 | 
			
		||||
                        };
 | 
			
		||||
                        let mut web = Website::new(&site_name.site(), &attr.value, false);
 | 
			
		||||
                        web.store(db).await;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
@@ -94,3 +104,10 @@ async fn walk(node: &rcdom::Handle, db: &Surreal<Client> , site_name: &str) {
 | 
			
		||||
        Box::pin(walk(child, db, site_name)).await; 
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Returns 0-50 uncrawled links (LIMIT = 50)
 | 
			
		||||
async fn get_uncrawled_links(db: &Surreal<Client>) -> Vec<Website> {
 | 
			
		||||
    let mut response = db.query("SELECT * FROM website WHERE crawled = false LIMIT 50").await.expect("Hard-coded query failed..?");
 | 
			
		||||
    response.take(0).expect("Returned websites couldn't be parsed")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user