meta-search/src/main.rs

113 lines
2.9 KiB
Rust
Raw Normal View History

2025-01-24 21:46:33 +00:00
/*
html.duckduckgo.com
google.com
*/
use std::fs;
use reqwest::{header::HeaderMap, Client, Method};
use rocket::{form::Form, fs::FileServer, post, routes, FromForm};
use rocket_dyn_templates::{context, Template};
use scraper::{Html, Selector};
use serde::Serialize;
#[derive(Serialize)]
struct SearchResult {
a: String,
desc: String,
title: String,
}
#[derive(FromForm)]
struct Query {
query: String
}
async fn google(client: &Client, search: &str) -> Vec<SearchResult> {
todo!()
}
async fn ddg(client: &Client, search: &str) -> Vec<SearchResult> {
// TODO url encode search
let mut headers = HeaderMap::new();
headers.insert("content-type", "application/x-www-form-urlencoded".parse().unwrap());
headers.insert("Accept-Language", "en-US,en;q=0.9".parse().unwrap());
headers.insert("Referer", "https://html.duckduckgo.com/".parse().unwrap());
let request = client
.request(Method::POST, "https://html.duckduckgo.com/html/")
.headers(headers)
.body(format!("q={}", search))
.build()
.unwrap();
let ddg_dom = client.execute(request)
.await
.unwrap()
.text()
.await
.unwrap();
fs::write("ddg-debug.html", &ddg_dom).unwrap();
let html = Html::parse_document(&ddg_dom);
let search_result = Selector::parse("#links > .results_links > .result__body ").unwrap();
let bot_detected = Selector::parse("anomaly-modal__mask").unwrap();
let count = html.select(&bot_detected).into_iter().count();
if count > 0 {
// we've been found out!
println!("bot");
}
let mut buf = Vec::new();
for result in html.select(&search_result) {
let title_selector = Selector::parse("h2 > a").unwrap();
let title = result.select(&title_selector).next().expect("Failed to get title");
let preview_selector = Selector::parse(".result__snippet").unwrap();
let preview = result.select(&preview_selector).next().expect("Failed to get title");
let a = title.attr("href").unwrap().to_string();
let title = title.inner_html();
let desc = preview.inner_html();
let sr = SearchResult {
a,
title,
desc
};
buf.push(sr);
}
buf
}
#[post("/", data="<input>")]
async fn search(input: Form<Query>) -> Template {
let clean = &input.query.trim();
let client = reqwest::ClientBuilder::new()
.user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36")
.gzip(true)
.build()
.unwrap();
Template::render("results", context! {
ddg: ddg(&client, clean).await,
last_search: clean
})
}
#[rocket::main]
async fn main() {
rocket::build()
.mount("/", routes![search])
.mount("/", FileServer::from("www"))
.attach(Template::fairing())
.launch()
.await
.unwrap();
}