add s3 support
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -1,5 +1,6 @@
 | 
				
			|||||||
/target
 | 
					/target
 | 
				
			||||||
/.surrealdb
 | 
					/.surrealdb
 | 
				
			||||||
 | 
					/.minio
 | 
				
			||||||
perf.data
 | 
					perf.data
 | 
				
			||||||
flamegraph.svg
 | 
					flamegraph.svg
 | 
				
			||||||
perf.data.old
 | 
					perf.data.old
 | 
				
			||||||
							
								
								
									
										2
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
								
							@@ -9,7 +9,7 @@
 | 
				
			|||||||
            "request": "launch",
 | 
					            "request": "launch",
 | 
				
			||||||
            "name": "Debug executable 'surreal_spider'",
 | 
					            "name": "Debug executable 'surreal_spider'",
 | 
				
			||||||
            "env": {
 | 
					            "env": {
 | 
				
			||||||
                "RUST_LOG": "surreal_spider=debug,reqwest=info",
 | 
					                "RUST_LOG": "surreal_spider=trace,reqwest=info",
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            "cargo": {
 | 
					            "cargo": {
 | 
				
			||||||
                "args": [
 | 
					                "args": [
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										254
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										254
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							@@ -1,6 +1,6 @@
 | 
				
			|||||||
# This file is automatically @generated by Cargo.
 | 
					# This file is automatically @generated by Cargo.
 | 
				
			||||||
# It is not intended for manual editing.
 | 
					# It is not intended for manual editing.
 | 
				
			||||||
version = 3
 | 
					version = 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "Inflector"
 | 
					name = "Inflector"
 | 
				
			||||||
@@ -103,6 +103,55 @@ dependencies = [
 | 
				
			|||||||
 "libc",
 | 
					 "libc",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "anstream"
 | 
				
			||||||
 | 
					version = "0.6.18"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "anstyle",
 | 
				
			||||||
 | 
					 "anstyle-parse",
 | 
				
			||||||
 | 
					 "anstyle-query",
 | 
				
			||||||
 | 
					 "anstyle-wincon",
 | 
				
			||||||
 | 
					 "colorchoice",
 | 
				
			||||||
 | 
					 "is_terminal_polyfill",
 | 
				
			||||||
 | 
					 "utf8parse",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "anstyle"
 | 
				
			||||||
 | 
					version = "1.0.10"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "anstyle-parse"
 | 
				
			||||||
 | 
					version = "0.2.6"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "utf8parse",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "anstyle-query"
 | 
				
			||||||
 | 
					version = "1.1.2"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "windows-sys 0.59.0",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "anstyle-wincon"
 | 
				
			||||||
 | 
					version = "3.0.6"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "anstyle",
 | 
				
			||||||
 | 
					 "windows-sys 0.59.0",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "any_ascii"
 | 
					name = "any_ascii"
 | 
				
			||||||
version = "0.3.2"
 | 
					version = "0.3.2"
 | 
				
			||||||
@@ -262,6 +311,17 @@ dependencies = [
 | 
				
			|||||||
 "serde_json",
 | 
					 "serde_json",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "async-recursion"
 | 
				
			||||||
 | 
					version = "1.1.1"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "proc-macro2",
 | 
				
			||||||
 | 
					 "quote",
 | 
				
			||||||
 | 
					 "syn 2.0.85",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "async-stream"
 | 
					name = "async-stream"
 | 
				
			||||||
version = "0.3.6"
 | 
					version = "0.3.6"
 | 
				
			||||||
@@ -665,6 +725,12 @@ dependencies = [
 | 
				
			|||||||
 "inout",
 | 
					 "inout",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "colorchoice"
 | 
				
			||||||
 | 
					version = "1.0.3"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "concurrent-queue"
 | 
					name = "concurrent-queue"
 | 
				
			||||||
version = "2.5.0"
 | 
					version = "2.5.0"
 | 
				
			||||||
@@ -705,6 +771,21 @@ dependencies = [
 | 
				
			|||||||
 "libc",
 | 
					 "libc",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "crc"
 | 
				
			||||||
 | 
					version = "3.2.1"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "crc-catalog",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "crc-catalog"
 | 
				
			||||||
 | 
					version = "2.4.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "crossbeam-utils"
 | 
					name = "crossbeam-utils"
 | 
				
			||||||
version = "0.8.20"
 | 
					version = "0.8.20"
 | 
				
			||||||
@@ -775,6 +856,20 @@ dependencies = [
 | 
				
			|||||||
 "parking_lot_core",
 | 
					 "parking_lot_core",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "dashmap"
 | 
				
			||||||
 | 
					version = "6.1.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "cfg-if",
 | 
				
			||||||
 | 
					 "crossbeam-utils",
 | 
				
			||||||
 | 
					 "hashbrown 0.14.5",
 | 
				
			||||||
 | 
					 "lock_api",
 | 
				
			||||||
 | 
					 "once_cell",
 | 
				
			||||||
 | 
					 "parking_lot_core",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "data-encoding"
 | 
					name = "data-encoding"
 | 
				
			||||||
version = "2.6.0"
 | 
					version = "2.6.0"
 | 
				
			||||||
@@ -791,6 +886,17 @@ dependencies = [
 | 
				
			|||||||
 "serde",
 | 
					 "serde",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "derivative"
 | 
				
			||||||
 | 
					version = "2.2.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "proc-macro2",
 | 
				
			||||||
 | 
					 "quote",
 | 
				
			||||||
 | 
					 "syn 1.0.109",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "deunicode"
 | 
					name = "deunicode"
 | 
				
			||||||
version = "1.6.0"
 | 
					version = "1.6.0"
 | 
				
			||||||
@@ -895,6 +1001,29 @@ version = "0.1.2"
 | 
				
			|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d"
 | 
					checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "env_filter"
 | 
				
			||||||
 | 
					version = "0.1.2"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "log",
 | 
				
			||||||
 | 
					 "regex",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "env_logger"
 | 
				
			||||||
 | 
					version = "0.11.5"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "anstream",
 | 
				
			||||||
 | 
					 "anstyle",
 | 
				
			||||||
 | 
					 "env_filter",
 | 
				
			||||||
 | 
					 "humantime",
 | 
				
			||||||
 | 
					 "log",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "equivalent"
 | 
					name = "equivalent"
 | 
				
			||||||
version = "1.0.1"
 | 
					version = "1.0.1"
 | 
				
			||||||
@@ -1279,6 +1408,15 @@ dependencies = [
 | 
				
			|||||||
 "digest",
 | 
					 "digest",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "home"
 | 
				
			||||||
 | 
					version = "0.5.9"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "windows-sys 0.52.0",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "html5ever"
 | 
					name = "html5ever"
 | 
				
			||||||
version = "0.27.0"
 | 
					version = "0.27.0"
 | 
				
			||||||
@@ -1347,6 +1485,12 @@ version = "1.9.5"
 | 
				
			|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946"
 | 
					checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "httpdate"
 | 
				
			||||||
 | 
					version = "1.0.3"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "humantime"
 | 
					name = "humantime"
 | 
				
			||||||
version = "2.1.0"
 | 
					version = "2.1.0"
 | 
				
			||||||
@@ -1366,6 +1510,7 @@ dependencies = [
 | 
				
			|||||||
 "http",
 | 
					 "http",
 | 
				
			||||||
 "http-body",
 | 
					 "http-body",
 | 
				
			||||||
 "httparse",
 | 
					 "httparse",
 | 
				
			||||||
 | 
					 "httpdate",
 | 
				
			||||||
 "itoa",
 | 
					 "itoa",
 | 
				
			||||||
 "pin-project-lite",
 | 
					 "pin-project-lite",
 | 
				
			||||||
 "smallvec",
 | 
					 "smallvec",
 | 
				
			||||||
@@ -1631,6 +1776,12 @@ version = "2.10.1"
 | 
				
			|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708"
 | 
					checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "is_terminal_polyfill"
 | 
				
			||||||
 | 
					version = "1.70.1"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "itertools"
 | 
					name = "itertools"
 | 
				
			||||||
version = "0.10.5"
 | 
					version = "0.10.5"
 | 
				
			||||||
@@ -1877,6 +2028,12 @@ dependencies = [
 | 
				
			|||||||
 "digest",
 | 
					 "digest",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "md5"
 | 
				
			||||||
 | 
					version = "0.7.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "memchr"
 | 
					name = "memchr"
 | 
				
			||||||
version = "2.7.4"
 | 
					version = "2.7.4"
 | 
				
			||||||
@@ -1922,6 +2079,46 @@ dependencies = [
 | 
				
			|||||||
 "unicase",
 | 
					 "unicase",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "minio"
 | 
				
			||||||
 | 
					version = "0.2.0-alpha"
 | 
				
			||||||
 | 
					source = "git+https://github.com/minio/minio-rs.git?rev=c28f576#c28f576cb8f8cf47fb941bb9db62b2cbd6f080c1"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "async-recursion",
 | 
				
			||||||
 | 
					 "async-trait",
 | 
				
			||||||
 | 
					 "base64 0.22.1",
 | 
				
			||||||
 | 
					 "byteorder",
 | 
				
			||||||
 | 
					 "bytes",
 | 
				
			||||||
 | 
					 "chrono",
 | 
				
			||||||
 | 
					 "crc",
 | 
				
			||||||
 | 
					 "dashmap 6.1.0",
 | 
				
			||||||
 | 
					 "derivative",
 | 
				
			||||||
 | 
					 "env_logger",
 | 
				
			||||||
 | 
					 "futures-util",
 | 
				
			||||||
 | 
					 "hex",
 | 
				
			||||||
 | 
					 "hmac",
 | 
				
			||||||
 | 
					 "home",
 | 
				
			||||||
 | 
					 "http",
 | 
				
			||||||
 | 
					 "hyper",
 | 
				
			||||||
 | 
					 "lazy_static",
 | 
				
			||||||
 | 
					 "log",
 | 
				
			||||||
 | 
					 "md5",
 | 
				
			||||||
 | 
					 "multimap",
 | 
				
			||||||
 | 
					 "os_info",
 | 
				
			||||||
 | 
					 "percent-encoding",
 | 
				
			||||||
 | 
					 "rand",
 | 
				
			||||||
 | 
					 "regex",
 | 
				
			||||||
 | 
					 "reqwest",
 | 
				
			||||||
 | 
					 "serde",
 | 
				
			||||||
 | 
					 "serde_json",
 | 
				
			||||||
 | 
					 "sha2",
 | 
				
			||||||
 | 
					 "tokio",
 | 
				
			||||||
 | 
					 "tokio-stream",
 | 
				
			||||||
 | 
					 "tokio-util",
 | 
				
			||||||
 | 
					 "urlencoding",
 | 
				
			||||||
 | 
					 "xmltree",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "miniz_oxide"
 | 
					name = "miniz_oxide"
 | 
				
			||||||
version = "0.8.0"
 | 
					version = "0.8.0"
 | 
				
			||||||
@@ -1960,6 +2157,15 @@ dependencies = [
 | 
				
			|||||||
 "version_check",
 | 
					 "version_check",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "multimap"
 | 
				
			||||||
 | 
					version = "0.10.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "serde",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "nanoid"
 | 
					name = "nanoid"
 | 
				
			||||||
version = "0.4.0"
 | 
					version = "0.4.0"
 | 
				
			||||||
@@ -2183,6 +2389,17 @@ dependencies = [
 | 
				
			|||||||
 "vcpkg",
 | 
					 "vcpkg",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "os_info"
 | 
				
			||||||
 | 
					version = "3.8.2"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "ae99c7fa6dd38c7cafe1ec085e804f8f555a2f8659b0dbe03f1f9963a9b51092"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "log",
 | 
				
			||||||
 | 
					 "serde",
 | 
				
			||||||
 | 
					 "windows-sys 0.52.0",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "overload"
 | 
					name = "overload"
 | 
				
			||||||
version = "0.1.1"
 | 
					version = "0.1.1"
 | 
				
			||||||
@@ -3437,6 +3654,7 @@ version = "0.1.0"
 | 
				
			|||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "html5ever 0.29.0",
 | 
					 "html5ever 0.29.0",
 | 
				
			||||||
 "markup5ever_rcdom",
 | 
					 "markup5ever_rcdom",
 | 
				
			||||||
 | 
					 "minio",
 | 
				
			||||||
 "reqwest",
 | 
					 "reqwest",
 | 
				
			||||||
 "serde",
 | 
					 "serde",
 | 
				
			||||||
 "surrealdb",
 | 
					 "surrealdb",
 | 
				
			||||||
@@ -3510,7 +3728,7 @@ dependencies = [
 | 
				
			|||||||
 "cedar-policy",
 | 
					 "cedar-policy",
 | 
				
			||||||
 "chrono",
 | 
					 "chrono",
 | 
				
			||||||
 "ciborium",
 | 
					 "ciborium",
 | 
				
			||||||
 "dashmap",
 | 
					 "dashmap 5.5.3",
 | 
				
			||||||
 "deunicode",
 | 
					 "deunicode",
 | 
				
			||||||
 "dmp",
 | 
					 "dmp",
 | 
				
			||||||
 "fst",
 | 
					 "fst",
 | 
				
			||||||
@@ -3840,6 +4058,17 @@ dependencies = [
 | 
				
			|||||||
 "tokio",
 | 
					 "tokio",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "tokio-stream"
 | 
				
			||||||
 | 
					version = "0.1.16"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "futures-core",
 | 
				
			||||||
 | 
					 "pin-project-lite",
 | 
				
			||||||
 | 
					 "tokio",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "tokio-tungstenite"
 | 
					name = "tokio-tungstenite"
 | 
				
			||||||
version = "0.23.1"
 | 
					version = "0.23.1"
 | 
				
			||||||
@@ -4107,6 +4336,12 @@ version = "1.0.4"
 | 
				
			|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 | 
					checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "utf8parse"
 | 
				
			||||||
 | 
					version = "0.2.2"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "uuid"
 | 
					name = "uuid"
 | 
				
			||||||
version = "1.11.0"
 | 
					version = "1.11.0"
 | 
				
			||||||
@@ -4484,6 +4719,12 @@ dependencies = [
 | 
				
			|||||||
 "tap",
 | 
					 "tap",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "xml-rs"
 | 
				
			||||||
 | 
					version = "0.8.23"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "af310deaae937e48a26602b730250b4949e125f468f11e6990be3e5304ddd96f"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "xml5ever"
 | 
					name = "xml5ever"
 | 
				
			||||||
version = "0.20.0"
 | 
					version = "0.20.0"
 | 
				
			||||||
@@ -4495,6 +4736,15 @@ dependencies = [
 | 
				
			|||||||
 "markup5ever 0.14.0",
 | 
					 "markup5ever 0.14.0",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "xmltree"
 | 
				
			||||||
 | 
					version = "0.11.0"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "b619f8c85654798007fb10afa5125590b43b088c225a25fc2fec100a9fad0fc6"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "xml-rs",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "yoke"
 | 
					name = "yoke"
 | 
				
			||||||
version = "0.7.4"
 | 
					version = "0.7.4"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,6 +6,8 @@ edition = "2021"
 | 
				
			|||||||
[dependencies]
 | 
					[dependencies]
 | 
				
			||||||
html5ever = "0.29.0"
 | 
					html5ever = "0.29.0"
 | 
				
			||||||
markup5ever_rcdom = "0.5.0-unofficial"
 | 
					markup5ever_rcdom = "0.5.0-unofficial"
 | 
				
			||||||
 | 
					# minio = "0.1.0"
 | 
				
			||||||
 | 
					minio = {git="https://github.com/minio/minio-rs.git", rev = "c28f576"}
 | 
				
			||||||
reqwest = "0.12.9"
 | 
					reqwest = "0.12.9"
 | 
				
			||||||
serde = { version = "1.0.214", features = ["derive"] }
 | 
					serde = { version = "1.0.214", features = ["derive"] }
 | 
				
			||||||
surrealdb = "2.0.4"
 | 
					surrealdb = "2.0.4"
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										17
									
								
								compose.yml
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								compose.yml
									
									
									
									
									
								
							@@ -1,5 +1,5 @@
 | 
				
			|||||||
services:
 | 
					services:
 | 
				
			||||||
  db:
 | 
					  surreal:
 | 
				
			||||||
    image: surrealdb/surrealdb:latest-dev
 | 
					    image: surrealdb/surrealdb:latest-dev
 | 
				
			||||||
    ports:
 | 
					    ports:
 | 
				
			||||||
    - 8000:8000
 | 
					    - 8000:8000
 | 
				
			||||||
@@ -14,3 +14,18 @@ services:
 | 
				
			|||||||
      - --pass
 | 
					      - --pass
 | 
				
			||||||
      - root
 | 
					      - root
 | 
				
			||||||
      - rocksdb:/mydata/database.db
 | 
					      - rocksdb:/mydata/database.db
 | 
				
			||||||
 | 
					  minio:
 | 
				
			||||||
 | 
					    image: quay.io/minio/minio
 | 
				
			||||||
 | 
					    ports:
 | 
				
			||||||
 | 
					      - 9000:9000
 | 
				
			||||||
 | 
					      - 9001:9001
 | 
				
			||||||
 | 
					    environment:
 | 
				
			||||||
 | 
					      - MINIO_ROOT_USER=root
 | 
				
			||||||
 | 
					      - MINIO_ROOT_PASSWORD=an8charpassword
 | 
				
			||||||
 | 
					    volumes:
 | 
				
			||||||
 | 
					      - ./.minio/:/data
 | 
				
			||||||
 | 
					    command:
 | 
				
			||||||
 | 
					      - server
 | 
				
			||||||
 | 
					      - /data
 | 
				
			||||||
 | 
					      - --console-address
 | 
				
			||||||
 | 
					      - ":9001"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,2 +0,0 @@
 | 
				
			|||||||
DEFINE TABLE website SCHEMALESS;
 | 
					 | 
				
			||||||
    DEFINE FIELD accessed_at ON TABLE website VALUE time::now();
 | 
					 | 
				
			||||||
							
								
								
									
										29
									
								
								src/db.rs
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								src/db.rs
									
									
									
									
									
								
							@@ -8,12 +8,12 @@ use surrealdb::{
 | 
				
			|||||||
use tracing::{error, instrument, trace, warn};
 | 
					use tracing::{error, instrument, trace, warn};
 | 
				
			||||||
use url::Url;
 | 
					use url::Url;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::Timer;
 | 
					use crate::{Config, Timer};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
 | 
					#[derive(Debug, Serialize, Deserialize, Clone)]
 | 
				
			||||||
pub struct Website {
 | 
					pub struct Website {
 | 
				
			||||||
    /// The url that this data is found at
 | 
					    /// The url that this data is found at
 | 
				
			||||||
    site: Url,
 | 
					    pub site: Url,
 | 
				
			||||||
    /// Wether or not this link has been crawled yet
 | 
					    /// Wether or not this link has been crawled yet
 | 
				
			||||||
    pub crawled: bool,
 | 
					    pub crawled: bool,
 | 
				
			||||||
    #[serde(skip_serializing)]
 | 
					    #[serde(skip_serializing)]
 | 
				
			||||||
@@ -39,10 +39,6 @@ impl Website {
 | 
				
			|||||||
        self.crawled = true
 | 
					        self.crawled = true
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn mut_url(&mut self) -> &mut Url {
 | 
					 | 
				
			||||||
        &mut self.site
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    #[instrument(skip_all)]
 | 
					    #[instrument(skip_all)]
 | 
				
			||||||
    pub async fn links_to(&self, other: Vec<Thing>, db: &Surreal<Client>) {
 | 
					    pub async fn links_to(&self, other: Vec<Thing>, db: &Surreal<Client>) {
 | 
				
			||||||
        let len = other.len();
 | 
					        let len = other.len();
 | 
				
			||||||
@@ -149,19 +145,30 @@ pub struct Record {
 | 
				
			|||||||
    pub id: Thing,
 | 
					    pub id: Thing,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub async fn connect() -> surrealdb::Result<Surreal<Client>> {
 | 
					#[instrument(skip_all, name = "SurrealDB")]
 | 
				
			||||||
 | 
					pub async fn connect(config: &Config<'_>) -> surrealdb::Result<Surreal<Client>> {
 | 
				
			||||||
 | 
					    trace!("Establishing connection to surreal...");
 | 
				
			||||||
    // Connect to the server
 | 
					    // Connect to the server
 | 
				
			||||||
    let db = Surreal::new::<Ws>("127.0.0.1:8000").await?;
 | 
					    let db = Surreal::new::<Ws>(config.surreal_url).await?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    trace!("Logging in...");
 | 
				
			||||||
    // Signin as a namespace, database, or root user
 | 
					    // Signin as a namespace, database, or root user
 | 
				
			||||||
    db.signin(Root {
 | 
					    db.signin(Root {
 | 
				
			||||||
        username: "root",
 | 
					        username: config.surreal_username,
 | 
				
			||||||
        password: "root",
 | 
					        password: config.surreal_password,
 | 
				
			||||||
    })
 | 
					    })
 | 
				
			||||||
    .await?;
 | 
					    .await?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Select a specific namespace / database
 | 
					    // Select a specific namespace / database
 | 
				
			||||||
    db.use_ns("test").use_db("v1.2").await?;
 | 
					    db
 | 
				
			||||||
 | 
					        .use_ns(config.surreal_ns)
 | 
				
			||||||
 | 
					        .use_db(config.surreal_db)
 | 
				
			||||||
 | 
					        .await?;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    let setup = include_bytes!("setup.surql");
 | 
				
			||||||
 | 
					    let file = setup.iter().map(|c| *c as char).collect::<String>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    db.query(file).await.expect("Failed to setup surreal tables.");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Ok(db)
 | 
					    Ok(db)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										56
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										56
									
								
								src/main.rs
									
									
									
									
									
								
							@@ -6,12 +6,27 @@ use html5ever::{
 | 
				
			|||||||
    local_name, parse_document, tendril::TendrilSink, tree_builder::TreeBuilderOpts, ParseOpts,
 | 
					    local_name, parse_document, tendril::TendrilSink, tree_builder::TreeBuilderOpts, ParseOpts,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
use rcdom::RcDom;
 | 
					use rcdom::RcDom;
 | 
				
			||||||
 | 
					use s3::S3;
 | 
				
			||||||
use std::time::Instant;
 | 
					use std::time::Instant;
 | 
				
			||||||
use surrealdb::{engine::remote::ws::Client, sql::Thing, Surreal};
 | 
					use surrealdb::{engine::remote::ws::Client, sql::Thing, Surreal};
 | 
				
			||||||
use tracing::{debug, info, instrument, trace, trace_span};
 | 
					use tracing::{debug, info, instrument, trace, trace_span};
 | 
				
			||||||
use tracing_subscriber::EnvFilter;
 | 
					use tracing_subscriber::EnvFilter;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
mod db;
 | 
					mod db;
 | 
				
			||||||
 | 
					mod s3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct Config<'a> {
 | 
				
			||||||
 | 
					    surreal_ns: &'a str,
 | 
				
			||||||
 | 
					    surreal_db: &'a str,
 | 
				
			||||||
 | 
					    surreal_url: &'a str,
 | 
				
			||||||
 | 
					    surreal_username: &'a str,
 | 
				
			||||||
 | 
					    surreal_password: &'a str,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    s3_url: &'a str,
 | 
				
			||||||
 | 
					    s3_bucket: &'a str,
 | 
				
			||||||
 | 
					    s3_access_key: &'a str,
 | 
				
			||||||
 | 
					    s3_secret_key: &'a str,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[tokio::main]
 | 
					#[tokio::main]
 | 
				
			||||||
async fn main() {
 | 
					async fn main() {
 | 
				
			||||||
@@ -21,16 +36,28 @@ async fn main() {
 | 
				
			|||||||
        .without_time()
 | 
					        .without_time()
 | 
				
			||||||
        .init();
 | 
					        .init();
 | 
				
			||||||
    debug!("Starting...");
 | 
					    debug!("Starting...");
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    let config = Config {
 | 
				
			||||||
 | 
					        surreal_ns: "test",
 | 
				
			||||||
 | 
					        surreal_db: "v1.5",
 | 
				
			||||||
 | 
					        surreal_url: "localhost:8000",
 | 
				
			||||||
 | 
					        surreal_username: "root",
 | 
				
			||||||
 | 
					        surreal_password: "root",
 | 
				
			||||||
 | 
					        s3_url: "http://localhost:9000",
 | 
				
			||||||
 | 
					        s3_bucket: "v1.5",
 | 
				
			||||||
 | 
					        s3_access_key: "8tUJn7e1paMFZQr0PKIT",
 | 
				
			||||||
 | 
					        s3_secret_key: "uSMvYxNOeCejCUgXVqgTfYlUEcmiZY0xcZ91M9E0",
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Would probably take these in as parameters from a cli
 | 
					    // Would probably take these in as parameters from a cli
 | 
				
			||||||
    let url = "https://oliveratkinson.net/";
 | 
					    let starting_url = "https://oliveratkinson.net/";
 | 
				
			||||||
    // let url = "http://localhost:5500";
 | 
					    let budget = 200;
 | 
				
			||||||
    let budget = 1000;
 | 
					 | 
				
			||||||
    let mut crawled = 0;
 | 
					    let mut crawled = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let db = connect().await.expect("Failed to connect to db, aborting.");
 | 
					    let s3 = S3::connect(&config).await.expect("Failed to connect to minio, aborting.");
 | 
				
			||||||
 | 
					    let db = connect(&config).await.expect("Failed to connect to surreal, aborting.");
 | 
				
			||||||
    let client = reqwest::Client::builder()
 | 
					    
 | 
				
			||||||
 | 
					    let reqwest = reqwest::Client::builder()
 | 
				
			||||||
        // .use_rustls_tls()
 | 
					        // .use_rustls_tls()
 | 
				
			||||||
        .build()
 | 
					        .build()
 | 
				
			||||||
        .unwrap();
 | 
					        .unwrap();
 | 
				
			||||||
@@ -40,8 +67,8 @@ async fn main() {
 | 
				
			|||||||
    let span = trace_span!("Pre-Loop");
 | 
					    let span = trace_span!("Pre-Loop");
 | 
				
			||||||
    let pre_loop_span = span.enter();
 | 
					    let pre_loop_span = span.enter();
 | 
				
			||||||
    // Download the site
 | 
					    // Download the site
 | 
				
			||||||
    let mut site = Website::new(&url, false);
 | 
					    let mut site = Website::new(&starting_url, false);
 | 
				
			||||||
    get(&mut site, &db, &client, &mut crawled).await;
 | 
					    get(&mut site, &db, &reqwest, &s3, &mut crawled).await;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    drop(pre_loop_span);
 | 
					    drop(pre_loop_span);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -65,7 +92,7 @@ async fn main() {
 | 
				
			|||||||
        let _ = span.enter();
 | 
					        let _ = span.enter();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for mut site in uncrawled {
 | 
					        for mut site in uncrawled {
 | 
				
			||||||
            get(&mut site, &db, &client, &mut crawled).await;
 | 
					            get(&mut site, &db, &reqwest, &s3, &mut crawled).await;
 | 
				
			||||||
            let percent = format!("{:.2}%", (crawled as f32 / budget as f32) * 100f32);
 | 
					            let percent = format!("{:.2}%", (crawled as f32 / budget as f32) * 100f32);
 | 
				
			||||||
            info!("Crawled {crawled} out of {budget} pages. ({percent})");
 | 
					            info!("Crawled {crawled} out of {budget} pages. ({percent})");
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -80,13 +107,14 @@ async fn main() {
 | 
				
			|||||||
async fn get(
 | 
					async fn get(
 | 
				
			||||||
    site: &mut Website,
 | 
					    site: &mut Website,
 | 
				
			||||||
    db: &Surreal<Client>,
 | 
					    db: &Surreal<Client>,
 | 
				
			||||||
    request_client: &reqwest::Client,
 | 
					    reqwest: &reqwest::Client,
 | 
				
			||||||
 | 
					    s3: &S3,
 | 
				
			||||||
    count: &mut usize,
 | 
					    count: &mut usize,
 | 
				
			||||||
) {
 | 
					) {
 | 
				
			||||||
    trace!("Get: {}", site.to_string());
 | 
					    trace!("Get: {}", site.to_string());
 | 
				
			||||||
    let timer = Timer::start("Got page");
 | 
					    let timer = Timer::start("Got page");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if let Ok(response) = request_client.get(site.to_string()).send().await {
 | 
					    if let Ok(response) = reqwest.get(site.to_string()).send().await {
 | 
				
			||||||
        timer.stop();
 | 
					        timer.stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Get body
 | 
					        // Get body
 | 
				
			||||||
@@ -98,6 +126,8 @@ async fn get(
 | 
				
			|||||||
            },
 | 
					            },
 | 
				
			||||||
            ..Default::default()
 | 
					            ..Default::default()
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
 | 
					        s3.store(&data, &site.site).await; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Get DOM
 | 
					        // Get DOM
 | 
				
			||||||
        let dom = parse_document(RcDom::default(), opts)
 | 
					        let dom = parse_document(RcDom::default(), opts)
 | 
				
			||||||
            .from_utf8()
 | 
					            .from_utf8()
 | 
				
			||||||
@@ -156,10 +186,10 @@ async fn walk(
 | 
				
			|||||||
                            let mut web = site.clone();
 | 
					                            let mut web = site.clone();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                            // Set url
 | 
					                            // Set url
 | 
				
			||||||
                            let url = web.mut_url();
 | 
					                            let mut url = web.site;
 | 
				
			||||||
                            url.set_fragment(None); // removes #xyz
 | 
					                            url.set_fragment(None); // removes #xyz
 | 
				
			||||||
                            let joined = url.join(&attr.value).unwrap();
 | 
					                            let joined = url.join(&attr.value).unwrap();
 | 
				
			||||||
                            *url = joined;
 | 
					                            web.site = joined;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                            // Set other attributes
 | 
					                            // Set other attributes
 | 
				
			||||||
                            web.crawled = false;
 | 
					                            web.crawled = false;
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										63
									
								
								src/s3.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								src/s3.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,63 @@
 | 
				
			|||||||
 | 
					use minio::s3::{
 | 
				
			||||||
 | 
					    args::{BucketExistsArgs, MakeBucketArgs},
 | 
				
			||||||
 | 
					    client::ClientBuilder,
 | 
				
			||||||
 | 
					    creds::StaticProvider,
 | 
				
			||||||
 | 
					    error::Error,
 | 
				
			||||||
 | 
					    http::BaseUrl,
 | 
				
			||||||
 | 
					    Client,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					use tracing::{instrument, trace};
 | 
				
			||||||
 | 
					use url::Url;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use crate::Config;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub struct S3 {
 | 
				
			||||||
 | 
					    bucket_name: String,
 | 
				
			||||||
 | 
					    client: Client,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl S3 {
 | 
				
			||||||
 | 
					    #[instrument(skip_all, name = "S3")]
 | 
				
			||||||
 | 
					    pub async fn connect(config: &Config<'_>) -> Result<Self, Error> {
 | 
				
			||||||
 | 
					        let base_url = config.s3_url.parse::<BaseUrl>().unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let static_provider =
 | 
				
			||||||
 | 
					            StaticProvider::new(&config.s3_access_key, &config.s3_secret_key, None);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let client = ClientBuilder::new(base_url)
 | 
				
			||||||
 | 
					            .provider(Some(Box::new(static_provider)))
 | 
				
			||||||
 | 
					            .build()?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        trace!("Checking bucket...");
 | 
				
			||||||
 | 
					        let exists = client
 | 
				
			||||||
 | 
					            .bucket_exists(&BucketExistsArgs::new(&config.s3_bucket).unwrap())
 | 
				
			||||||
 | 
					            .await?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if !exists {
 | 
				
			||||||
 | 
					            trace!("Creating bucket...");
 | 
				
			||||||
 | 
					            client
 | 
				
			||||||
 | 
					                .make_bucket(&MakeBucketArgs::new(&config.s3_bucket).unwrap())
 | 
				
			||||||
 | 
					                .await?;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        trace!("Connection successfull");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Ok(Self {
 | 
				
			||||||
 | 
					            bucket_name: config.s3_bucket.to_owned(),
 | 
				
			||||||
 | 
					            client: client,
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub async fn store(&self, data: &str, name: &Url) {
 | 
				
			||||||
 | 
					        if let Some(domain) = name.domain() {
 | 
				
			||||||
 | 
					            let filename = domain.to_string() + name.path();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            let _ = &self
 | 
				
			||||||
 | 
					                .client
 | 
				
			||||||
 | 
					                .put_object_content(&self.bucket_name, &filename, data.to_owned())
 | 
				
			||||||
 | 
					                .send()
 | 
				
			||||||
 | 
					                .await
 | 
				
			||||||
 | 
					                .unwrap();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										2
									
								
								src/setup.surql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								src/setup.surql
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,2 @@
 | 
				
			|||||||
 | 
					DEFINE TABLE IF NOT EXISTS website SCHEMALESS;
 | 
				
			||||||
 | 
					DEFINE FIELD IF NOT EXISTS accessed_at ON TABLE website VALUE time::now();
 | 
				
			||||||
		Reference in New Issue
	
	Block a user