use std::{ collections::{BTreeSet, HashMap}, fmt, net::IpAddr, ops::Deref, str::FromStr, sync::LazyLock, time::Instant, }; use futures::future::join_all; use tokio::sync::mpsc; pub mod answer; pub mod postsearch; pub mod search; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum Engine { // search Google, Bing, Brave, // answer Useragent, Ip, Calc, Wikipedia, // post-search StackOverflow, } impl Engine { pub fn all() -> &'static [Engine] { &[ Engine::Google, Engine::Bing, Engine::Brave, Engine::Useragent, Engine::Ip, Engine::Calc, Engine::Wikipedia, Engine::StackOverflow, ] } pub fn id(&self) -> &'static str { match self { Engine::Google => "google", Engine::Bing => "bing", Engine::Brave => "brave", Engine::Useragent => "useragent", Engine::Ip => "ip", Engine::Calc => "calc", Engine::Wikipedia => "wikipedia", Engine::StackOverflow => "stackoverflow", } } pub fn weight(&self) -> f64 { match self { Engine::Google => 1.05, Engine::Bing => 1., Engine::Brave => 1.25, _ => 1., } } pub fn request(&self, query: &SearchQuery) -> RequestResponse { match self { Engine::Google => search::google::request(query).into(), Engine::Bing => search::bing::request(query).into(), Engine::Brave => search::brave::request(query).into(), Engine::Useragent => answer::useragent::request(query).into(), Engine::Ip => answer::ip::request(query).into(), Engine::Calc => answer::calc::request(query).into(), Engine::Wikipedia => answer::wikipedia::request(query).into(), _ => RequestResponse::None, } } pub fn parse_response(&self, body: &str) -> eyre::Result { match self { Engine::Google => search::google::parse_response(body), Engine::Bing => search::bing::parse_response(body), Engine::Brave => search::brave::parse_response(body), Engine::Wikipedia => answer::wikipedia::parse_response(body), _ => eyre::bail!("engine {self:?} can't parse response"), } } pub fn request_autocomplete(&self, query: &str) -> Option { match self { Engine::Google => Some(search::google::request_autocomplete(query).into()), Engine::Calc => Some(answer::calc::request_autocomplete(query).into()), _ => None, } } pub fn parse_autocomplete_response(&self, body: &str) -> eyre::Result> { match self { Engine::Google => search::google::parse_autocomplete_response(body), _ => eyre::bail!("engine {self:?} can't parse autocomplete response"), } } pub fn postsearch_request(&self, response: &Response) -> Option { match self { Engine::StackOverflow => postsearch::stackoverflow::request(response), _ => None, } } pub fn postsearch_parse_response(&self, body: &str) -> Option { match self { Engine::StackOverflow => postsearch::stackoverflow::parse_response(body), _ => None, } } } impl fmt::Display for Engine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.id()) } } pub struct SearchQuery { pub query: String, pub request_headers: HashMap, pub ip: String, } impl Deref for SearchQuery { type Target = str; fn deref(&self) -> &Self::Target { &self.query } } pub enum RequestResponse { None, Http(reqwest::RequestBuilder), Instant(EngineResponse), } impl From for RequestResponse { fn from(req: reqwest::RequestBuilder) -> Self { Self::Http(req) } } impl From for RequestResponse { fn from(res: EngineResponse) -> Self { Self::Instant(res) } } pub enum RequestAutocompleteResponse { Http(reqwest::RequestBuilder), Instant(Vec), } impl From for RequestAutocompleteResponse { fn from(req: reqwest::RequestBuilder) -> Self { Self::Http(req) } } impl From> for RequestAutocompleteResponse { fn from(res: Vec) -> Self { Self::Instant(res) } } #[derive(Debug)] pub struct EngineSearchResult { pub url: String, pub title: String, pub description: String, } #[derive(Debug)] pub struct EngineFeaturedSnippet { pub url: String, pub title: String, pub description: String, } #[derive(Debug, Default)] pub struct EngineResponse { pub search_results: Vec, pub featured_snippet: Option, pub answer_html: Option, pub infobox_html: Option, } impl EngineResponse { pub fn new() -> Self { Self::default() } pub fn answer_html(html: String) -> Self { Self { answer_html: Some(html), ..Default::default() } } pub fn infobox_html(html: String) -> Self { Self { infobox_html: Some(html), ..Default::default() } } } #[derive(Debug)] pub enum EngineProgressUpdate { Requesting, Downloading, Parsing, Done, } #[derive(Debug)] pub enum ProgressUpdateData { Engine { engine: Engine, update: EngineProgressUpdate, }, Response(Response), PostSearchInfobox(Infobox), } #[derive(Debug)] pub struct ProgressUpdate { pub data: ProgressUpdateData, pub time_ms: u64, } impl ProgressUpdate { pub fn new(data: ProgressUpdateData, start_time: Instant) -> Self { Self { data, time_ms: start_time.elapsed().as_millis() as u64, } } } pub async fn search_with_engines( engines: &[Engine], query: &SearchQuery, progress_tx: mpsc::UnboundedSender, ) -> eyre::Result<()> { let start_time = Instant::now(); let mut requests = Vec::new(); for engine in engines { requests.push(async { let engine = *engine; let request_response = engine.request(query).into(); let response = match request_response { RequestResponse::Http(request) => { progress_tx.send(ProgressUpdate::new( ProgressUpdateData::Engine { engine, update: EngineProgressUpdate::Requesting, }, start_time, ))?; let res = request.send().await?; progress_tx.send(ProgressUpdate::new( ProgressUpdateData::Engine { engine, update: EngineProgressUpdate::Downloading, }, start_time, ))?; let body = res.text().await?; progress_tx.send(ProgressUpdate::new( ProgressUpdateData::Engine { engine, update: EngineProgressUpdate::Parsing, }, start_time, ))?; let response = engine.parse_response(&body)?; progress_tx.send(ProgressUpdate::new( ProgressUpdateData::Engine { engine, update: EngineProgressUpdate::Done, }, start_time, ))?; response } RequestResponse::Instant(response) => response, RequestResponse::None => EngineResponse::new(), }; Ok((engine, response)) }); } let mut response_futures = Vec::new(); for request in requests { response_futures.push(request); } let responses_result: eyre::Result> = join_all(response_futures).await.into_iter().collect(); let responses = responses_result?; let response = merge_engine_responses(responses); let has_infobox = response.infobox.is_some(); progress_tx.send(ProgressUpdate::new( ProgressUpdateData::Response(response.clone()), start_time, ))?; if !has_infobox { // post-search let mut postsearch_requests = Vec::new(); for engine in engines { if let Some(request) = engine.postsearch_request(&response) { postsearch_requests.push(async { let response = match request.send().await { Ok(res) => { let body = res.text().await?; engine.postsearch_parse_response(&body) } Err(e) => { eprintln!("postsearch request error: {}", e); None } }; Ok((*engine, response)) }); } } let mut postsearch_response_futures = Vec::new(); for request in postsearch_requests { postsearch_response_futures.push(request); } let postsearch_responses_result: eyre::Result> = join_all(postsearch_response_futures) .await .into_iter() .collect(); let postsearch_responses = postsearch_responses_result?; for (engine, response) in postsearch_responses { if let Some(html) = response { progress_tx.send(ProgressUpdate::new( ProgressUpdateData::PostSearchInfobox(Infobox { html, engine }), start_time, ))?; } } } Ok(()) } pub async fn autocomplete_with_engines( engines: &[Engine], query: &str, ) -> eyre::Result> { let mut requests = Vec::new(); for engine in engines { if let Some(request) = engine.request_autocomplete(query) { requests.push(async { let response = match request { RequestAutocompleteResponse::Http(request) => { let res = request.send().await?; let body = res.text().await?; engine.parse_autocomplete_response(&body)? } RequestAutocompleteResponse::Instant(response) => response, }; Ok((*engine, response)) }); } } let mut autocomplete_futures = Vec::new(); for request in requests { autocomplete_futures.push(request); } let autocomplete_results_result: eyre::Result> = join_all(autocomplete_futures).await.into_iter().collect(); let autocomplete_results = autocomplete_results_result?; Ok(merge_autocomplete_responses(autocomplete_results)) } pub static CLIENT: LazyLock = LazyLock::new(|| { reqwest::ClientBuilder::new() .local_address(IpAddr::from_str("0.0.0.0").unwrap()) .build() .unwrap() }); pub async fn search( query: SearchQuery, progress_tx: mpsc::UnboundedSender, ) -> eyre::Result<()> { let engines = Engine::all(); search_with_engines(&engines, &query, progress_tx).await } pub async fn autocomplete(query: &str) -> eyre::Result> { let engines = Engine::all(); autocomplete_with_engines(&engines, query).await } #[derive(Debug, Clone)] pub struct Response { pub search_results: Vec, pub featured_snippet: Option, pub answer: Option, pub infobox: Option, } #[derive(Debug, Clone)] pub struct SearchResult { pub url: String, pub title: String, pub description: String, pub engines: BTreeSet, pub score: f64, } #[derive(Debug, Clone)] pub struct FeaturedSnippet { pub url: String, pub title: String, pub description: String, pub engine: Engine, } #[derive(Debug, Clone)] pub struct Answer { pub html: String, pub engine: Engine, } #[derive(Debug, Clone)] pub struct Infobox { pub html: String, pub engine: Engine, } fn merge_engine_responses(responses: HashMap) -> Response { let mut search_results: Vec = Vec::new(); let mut featured_snippet: Option = None; let mut answer: Option = None; let mut infobox: Option = None; for (engine, response) in responses { for (result_index, search_result) in response.search_results.into_iter().enumerate() { // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a score of 0.33, etc. let base_result_score = 1. / (result_index + 1) as f64; let result_score = base_result_score * engine.weight(); if let Some(existing_result) = search_results .iter_mut() .find(|r| r.url == search_result.url) { // if the weight of this engine is higher than every other one then replace the title and description if engine.weight() > existing_result .engines .iter() .map(Engine::weight) .max_by(|a, b| a.partial_cmp(b).unwrap()) .unwrap_or(0.) { existing_result.title = search_result.title; existing_result.description = search_result.description; } existing_result.engines.insert(engine); existing_result.score += result_score; } else { search_results.push(SearchResult { url: search_result.url, title: search_result.title, description: search_result.description, engines: [engine].iter().cloned().collect(), score: result_score, }); } } if let Some(engine_featured_snippet) = response.featured_snippet { // if it has a higher weight than the current featured snippet let featured_snippet_weight = featured_snippet .as_ref() .map(|s| s.engine.weight()) .unwrap_or(0.); if engine.weight() > featured_snippet_weight { featured_snippet = Some(FeaturedSnippet { url: engine_featured_snippet.url, title: engine_featured_snippet.title, description: engine_featured_snippet.description, engine, }); } } if let Some(engine_answer_html) = response.answer_html { // if it has a higher weight than the current answer let answer_weight = answer.as_ref().map(|s| s.engine.weight()).unwrap_or(0.); if engine.weight() > answer_weight { answer = Some(Answer { html: engine_answer_html, engine, }); } } if let Some(engine_infobox_html) = response.infobox_html { // if it has a higher weight than the current infobox let infobox_weight = infobox.as_ref().map(|s| s.engine.weight()).unwrap_or(0.); if engine.weight() > infobox_weight { infobox = Some(Infobox { html: engine_infobox_html, engine, }); } } } search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); Response { search_results, featured_snippet, answer, infobox, } } pub struct AutocompleteResult { pub query: String, pub score: f64, } fn merge_autocomplete_responses(responses: HashMap>) -> Vec { let mut autocomplete_results: Vec = Vec::new(); for (engine, response) in responses { for (result_index, autocomplete_result) in response.into_iter().enumerate() { // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a score of 0.33, etc. let base_result_score = 1. / (result_index + 1) as f64; let result_score = base_result_score * engine.weight(); if let Some(existing_result) = autocomplete_results .iter_mut() .find(|r| r.query == autocomplete_result) { existing_result.score += result_score; } else { autocomplete_results.push(AutocompleteResult { query: autocomplete_result, score: result_score, }); } } } autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); autocomplete_results.into_iter().map(|r| r.query).collect() }