de-enshitified file saving logic

This commit is contained in:
Rushmore75 2025-04-17 08:17:29 -06:00
parent 9bfa8f9108
commit 3497312fd4

View File

@ -1,30 +1,35 @@
use std::{ffi::OsStr, io::ErrorKind, path::PathBuf};
use reqwest::header::HeaderValue;
use tokio::fs;
use tracing::{error, trace};
use url::Url;
pub fn as_path(url: &Url) -> PathBuf {
pub fn as_path(url: &Url, content_type: &HeaderValue) -> PathBuf {
// extract data from url to save it accurately
let url_path = PathBuf::from("./downloaded/".to_string() + url.domain().unwrap_or("UnknownDomain") + url.path());
let mut url_path = PathBuf::from("./downloaded/".to_string() + url.domain().unwrap_or("UnknownDomain") + url.path());
// if it's a file
let (basepath, filename) = if url_path.extension().filter(valid_file_extension).is_some() {
// get everything up till the file
let basepath = url_path.ancestors().skip(1).take(1).collect::<PathBuf>();
// get the file name
let filename = url_path.file_name().expect("This should exist").to_string_lossy();
trace!("Save path: {:?} and base path: {:?}", &url_path, &basepath);
(basepath, filename.to_string())
if let Ok(header) = content_type.to_str() {
// text/html; charset=UTF-8; option=value
let ttype = if let Some((t, _)) = header.split_once(';') {
t
} else {
(url_path.clone(), "index.html".into())
header
};
let mut path = PathBuf::new();
path = path.join(basepath);
path = path.join(filename);
if let Some((ttype, subtype)) = ttype.split_once('/') {
trace!("Found Content-Type to be: {ttype}/{subtype} for {}", url.to_string());
// If the Content-Type header is "*/html" (most likely "text/html") and the path's
// extension is anything but html:
if subtype=="html" && !url_path.extension().is_some_and(|f| f=="html" || f=="htm" ) {
// time to slap a index.html to the end of that path there!
url_path = url_path.join("index.html");
}
}
}
trace!("Final path for {} is: {:?}", url, url_path);
path
url_path
}
pub async fn init(filename: &PathBuf) -> Option<fs::File> {