foss_storage #3
@ -3,7 +3,7 @@ surreal_url = "localhost:8000"
|
||||
surreal_username = "root"
|
||||
surreal_password = "root"
|
||||
surreal_ns = "test"
|
||||
surreal_db = "v1.18.1"
|
||||
surreal_db = "v1.19.1"
|
||||
|
||||
# Crawler config
|
||||
crawl_filter = "en.wikipedia.com"
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::path::PathBuf;
|
||||
use std::{ffi::OsStr, path::PathBuf};
|
||||
|
||||
use tokio::fs;
|
||||
use tracing::{error, instrument, trace};
|
||||
use tracing::{error, instrument, trace, warn};
|
||||
use url::Url;
|
||||
|
||||
#[instrument(skip(data))]
|
||||
@ -10,7 +10,7 @@ pub async fn store(data: &str, url: &Url) {
|
||||
let url_path = PathBuf::from("./downloaded/".to_string() + url.domain().unwrap_or("UnknownDomain") + url.path());
|
||||
|
||||
// if it's a file
|
||||
let (basepath, filename) = if url_path.extension().is_some() {
|
||||
let (basepath, filename) = if url_path.extension().filter(valid_file_extension).is_some() {
|
||||
// get everything up till the file
|
||||
let basepath = url_path.ancestors().skip(1).take(1).collect::<PathBuf>();
|
||||
// get the file name
|
||||
@ -33,3 +33,36 @@ pub async fn store(data: &str, url: &Url) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn valid_file_extension(take: &&OsStr) -> bool {
|
||||
let los = take.to_string_lossy();
|
||||
let all = los.split('.');
|
||||
match all.last() {
|
||||
Some(s) => {
|
||||
match s.to_lowercase().as_str() {
|
||||
"html" => true,
|
||||
"css" => true,
|
||||
"js" => true,
|
||||
"ts" => true,
|
||||
"otf" => true, // font
|
||||
|
||||
"png" => true,
|
||||
"svg" => true,
|
||||
"jpg" => true,
|
||||
"jpeg" => true,
|
||||
"mp4" => true,
|
||||
"mp3" => true,
|
||||
"webp" => true,
|
||||
|
||||
"pdf" => true,
|
||||
"json" => true,
|
||||
"xml" => true,
|
||||
_ => {
|
||||
warn!("Might be forgetting a file extension: {s}");
|
||||
false
|
||||
}
|
||||
}
|
||||
},
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user