From bcf42a733ac8e541abe91e40c13afefe1c32fc6b Mon Sep 17 00:00:00 2001 From: mat Date: Wed, 20 Dec 2023 04:08:36 -0600 Subject: [PATCH] add instant answers --- Cargo.lock | 1 + Cargo.toml | 1 + src/engines/answer.rs | 10 +++ src/engines/answer/ip.rs | 13 +++ src/engines/answer/useragent.rs | 19 +++++ src/engines/mod.rs | 145 ++++++++++++++++++++++++++------ src/parse.rs | 2 + src/web/assets/script.js | 2 - src/web/assets/style.css | 7 ++ src/web/autocomplete.rs | 2 +- src/web/mod.rs | 9 +- src/web/search.rs | 37 ++++++-- 12 files changed, 211 insertions(+), 37 deletions(-) create mode 100644 src/engines/answer.rs create mode 100644 src/engines/answer/ip.rs create mode 100644 src/engines/answer/useragent.rs diff --git a/Cargo.lock b/Cargo.lock index e3cb8c2..60ce025 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -794,6 +794,7 @@ dependencies = [ "eyre", "futures", "html-escape", + "regex", "reqwest", "scraper", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 087a8dc..d847b8d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ bytes = "1.5.0" eyre = "0.6.11" futures = "0.3.29" html-escape = "0.2.13" +regex = "1.10.2" reqwest = { version = "0.11.23", default-features = false, features = [ "rustls-tls", ] } diff --git a/src/engines/answer.rs b/src/engines/answer.rs new file mode 100644 index 0000000..f557f14 --- /dev/null +++ b/src/engines/answer.rs @@ -0,0 +1,10 @@ +pub mod ip; +pub mod useragent; + +macro_rules! regex { + ($re:literal $(,)?) => {{ + static RE: std::sync::OnceLock = std::sync::OnceLock::new(); + RE.get_or_init(|| regex::Regex::new($re).unwrap()) + }}; +} +pub(crate) use regex; diff --git a/src/engines/answer/ip.rs b/src/engines/answer/ip.rs new file mode 100644 index 0000000..1997cf9 --- /dev/null +++ b/src/engines/answer/ip.rs @@ -0,0 +1,13 @@ +use crate::engines::{EngineResponse, SearchQuery}; + +use super::regex; + +pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse { + if !regex!("^what('s|s| is) my ip").is_match(&query.query.to_lowercase()) { + return EngineResponse::new(); + } + + let ip = &query.ip; + + EngineResponse::answer_html(format!("Your IP address is {ip}")) +} diff --git a/src/engines/answer/useragent.rs b/src/engines/answer/useragent.rs new file mode 100644 index 0000000..f8bc844 --- /dev/null +++ b/src/engines/answer/useragent.rs @@ -0,0 +1,19 @@ +use crate::engines::{EngineResponse, SearchQuery}; + +use super::regex; + +pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse { + if !regex!("^what('s|s| is) my (user ?agent|ua)|ua|user ?agent$") + .is_match(&query.query.to_lowercase()) + { + return EngineResponse::new(); + } + + let user_agent = query.request_headers.get("user-agent"); + + EngineResponse::answer_html(if let Some(user_agent) = user_agent { + format!("Your user agent is {user_agent}") + } else { + format!("You don't have a user agent") + }) +} diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 1f7f4f5..e721aaa 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -1,5 +1,6 @@ use std::{ collections::{BTreeSet, HashMap}, + ops::Deref, sync::LazyLock, time::Instant, }; @@ -9,18 +10,29 @@ use tokio::sync::mpsc; use self::search::{bing, brave, google}; +pub mod answer; pub mod search; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum Engine { + // search Google, Bing, Brave, + // answer + Useragent, + Ip, } impl Engine { pub fn all() -> &'static [Engine] { - &[Engine::Google, Engine::Bing, Engine::Brave] + &[ + Engine::Google, + Engine::Bing, + Engine::Brave, + Engine::Useragent, + Engine::Ip, + ] } pub fn id(&self) -> &'static str { @@ -28,6 +40,8 @@ impl Engine { Engine::Google => "google", Engine::Bing => "bing", Engine::Brave => "brave", + Engine::Useragent => "useragent", + Engine::Ip => "ip", } } @@ -36,14 +50,17 @@ impl Engine { Engine::Google => 1.05, Engine::Bing => 1., Engine::Brave => 1.25, + _ => 1., } } - pub fn request(&self, client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder { + pub fn request(&self, client: &reqwest::Client, query: &SearchQuery) -> RequestResponse { match self { - Engine::Google => google::request(client, query), - Engine::Bing => bing::request(client, query), - Engine::Brave => brave::request(client, query), + Engine::Google => google::request(client, query).into(), + Engine::Bing => bing::request(client, query).into(), + Engine::Brave => search::brave::request(client, query).into(), + Engine::Useragent => answer::useragent::request(client, query).into(), + Engine::Ip => answer::ip::request(client, query).into(), } } @@ -52,6 +69,7 @@ impl Engine { Engine::Google => google::parse_response(body), Engine::Bing => bing::parse_response(body), Engine::Brave => brave::parse_response(body), + _ => eyre::bail!("engine {self:?} can't parse response"), } } @@ -74,6 +92,36 @@ impl Engine { } } +pub struct SearchQuery { + pub query: String, + pub request_headers: HashMap, + pub ip: String, +} + +impl Deref for SearchQuery { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.query + } +} + +pub enum RequestResponse { + Http(reqwest::RequestBuilder), + Instant(EngineResponse), +} + +impl From for RequestResponse { + fn from(req: reqwest::RequestBuilder) -> Self { + Self::Http(req) + } +} +impl From for RequestResponse { + fn from(res: EngineResponse) -> Self { + Self::Instant(res) + } +} + #[derive(Debug)] pub struct EngineSearchResult { pub url: String, @@ -88,10 +136,24 @@ pub struct EngineFeaturedSnippet { pub description: String, } -#[derive(Debug)] +#[derive(Debug, Default)] pub struct EngineResponse { pub search_results: Vec, pub featured_snippet: Option, + pub answer_html: Option, +} + +impl EngineResponse { + pub fn new() -> Self { + Self::default() + } + + pub fn answer_html(html: String) -> Self { + Self { + answer_html: Some(html), + ..Default::default() + } + } } #[derive(Debug)] @@ -122,7 +184,7 @@ impl ProgressUpdate { pub async fn search_with_client_and_engines( client: &reqwest::Client, engines: &[Engine], - query: &str, + query: &SearchQuery, progress_tx: mpsc::UnboundedSender, ) -> eyre::Result { let start_time = Instant::now(); @@ -137,29 +199,38 @@ pub async fn search_with_client_and_engines( start_time, ))?; - let res = engine.request(client, query).send().await?; + let request_response = engine.request(client, query).into(); - progress_tx.send(ProgressUpdate::new( - ProgressUpdateKind::Downloading, - engine, - start_time, - ))?; + let response = match request_response { + RequestResponse::Http(request) => { + let res = request.send().await?; - let body = res.text().await?; + progress_tx.send(ProgressUpdate::new( + ProgressUpdateKind::Downloading, + engine, + start_time, + ))?; - progress_tx.send(ProgressUpdate::new( - ProgressUpdateKind::Parsing, - engine, - start_time, - ))?; + let body = res.text().await?; - let response = engine.parse_response(&body)?; + progress_tx.send(ProgressUpdate::new( + ProgressUpdateKind::Parsing, + engine, + start_time, + ))?; - progress_tx.send(ProgressUpdate::new( - ProgressUpdateKind::Done, - engine, - start_time, - ))?; + let response = engine.parse_response(&body)?; + + progress_tx.send(ProgressUpdate::new( + ProgressUpdateKind::Done, + engine, + start_time, + ))?; + + response + } + RequestResponse::Instant(response) => response, + }; Ok((engine, response)) }); @@ -209,11 +280,11 @@ pub async fn autocomplete_with_client_and_engines( static CLIENT: LazyLock = LazyLock::new(|| reqwest::Client::new()); pub async fn search( - query: &str, + query: SearchQuery, progress_tx: mpsc::UnboundedSender, ) -> eyre::Result { let engines = Engine::all(); - search_with_client_and_engines(&CLIENT, &engines, query, progress_tx).await + search_with_client_and_engines(&CLIENT, &engines, &query, progress_tx).await } pub async fn autocomplete(query: &str) -> eyre::Result> { @@ -225,6 +296,7 @@ pub async fn autocomplete(query: &str) -> eyre::Result> { pub struct Response { pub search_results: Vec, pub featured_snippet: Option, + pub answer: Option, } #[derive(Debug)] @@ -244,9 +316,16 @@ pub struct FeaturedSnippet { pub engine: Engine, } +#[derive(Debug)] +pub struct Answer { + pub html: String, + pub engine: Engine, +} + fn merge_engine_responses(responses: HashMap) -> Response { let mut search_results: Vec = Vec::new(); let mut featured_snippet: Option = None; + let mut answer: Option = None; for (engine, response) in responses { for (result_index, search_result) in response.search_results.into_iter().enumerate() { @@ -299,6 +378,17 @@ fn merge_engine_responses(responses: HashMap) -> Respons }); } } + + if let Some(engine_answer_html) = response.answer_html { + // if it has a higher weight than the current answer + let answer_weight = answer.as_ref().map(|s| s.engine.weight()).unwrap_or(0.); + if engine.weight() > answer_weight { + answer = Some(Answer { + html: engine_answer_html, + engine, + }); + } + } } search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); @@ -306,6 +396,7 @@ fn merge_engine_responses(responses: HashMap) -> Respons Response { search_results, featured_snippet, + answer, } } diff --git a/src/parse.rs b/src/parse.rs index 27bec38..428fc1d 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -176,5 +176,7 @@ pub(super) fn parse_html_response_with_opts( Ok(EngineResponse { search_results, featured_snippet, + // this field is used by instant answers, not normal search engines + answer_html: None, }) } diff --git a/src/web/assets/script.js b/src/web/assets/script.js index ed4aa30..0f4fb46 100644 --- a/src/web/assets/script.js +++ b/src/web/assets/script.js @@ -12,8 +12,6 @@ searchInputEl.addEventListener("input", async (e) => { const res = await fetch(`/autocomplete?q=${value}`).then((res) => res.json()); const options = res[1]; - console.log(options); - datalistEl.innerHTML = ""; options.forEach((option) => { const optionEl = document.createElement("option"); diff --git a/src/web/assets/style.css b/src/web/assets/style.css index 1ae92e4..da6afd1 100644 --- a/src/web/assets/style.css +++ b/src/web/assets/style.css @@ -97,6 +97,13 @@ h1 { margin-bottom: 1rem; } +/* answer */ +.answer { + margin-bottom: 1rem; + border: 1px solid #234; + padding: 0.5rem; +} + /* progress update */ .progress-updates { margin-bottom: 1rem; diff --git a/src/web/autocomplete.rs b/src/web/autocomplete.rs index b69d8bf..4cdce19 100644 --- a/src/web/autocomplete.rs +++ b/src/web/autocomplete.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use axum::{extract::Query, http::StatusCode, response::IntoResponse, Json}; -use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response}; +use crate::engines; pub async fn route(Query(params): Query>) -> impl IntoResponse { let query = params diff --git a/src/web/mod.rs b/src/web/mod.rs index a55f3e5..94c45fb 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -1,6 +1,8 @@ pub mod autocomplete; pub mod search; +use std::net::SocketAddr; + use axum::{http::header, routing::get, Router}; pub const BIND_ADDRESS: &str = "[::]:3000"; @@ -40,5 +42,10 @@ pub async fn run() { println!("Listening on {BIND_ADDRESS}"); let listener = tokio::net::TcpListener::bind(BIND_ADDRESS).await.unwrap(); - axum::serve(listener, app).await.unwrap(); + axum::serve( + listener, + app.into_make_service_with_connect_info::(), + ) + .await + .unwrap(); } diff --git a/src/web/search.rs b/src/web/search.rs index 688b420..27f4978 100644 --- a/src/web/search.rs +++ b/src/web/search.rs @@ -1,16 +1,16 @@ -use std::collections::HashMap; +use std::{collections::HashMap, net::SocketAddr}; use async_stream::stream; use axum::{ body::Body, - extract::Query, - http::{header, StatusCode}, + extract::{ConnectInfo, Query}, + http::{header, HeaderMap, StatusCode}, response::IntoResponse, }; use bytes::Bytes; use html_escape::{encode_text, encode_unquoted_attribute}; -use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response}; +use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response, SearchQuery}; fn render_beginning_of_html(query: &str) -> String { format!( @@ -91,6 +91,13 @@ fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> Strin fn render_results(response: Response) -> String { let mut html = String::new(); + if let Some(answer) = response.answer { + html.push_str(&format!( + r#"
{answer_html}{engines_html}
"#, + answer_html = &answer.html, + engines_html = render_engine_list(&[answer.engine]) + )); + } if let Some(featured_snippet) = response.featured_snippet { html.push_str(&render_featured_snippet(&featured_snippet)); } @@ -116,7 +123,11 @@ fn render_progress_update(progress_update: &ProgressUpdate) -> String { ) } -pub async fn route(Query(params): Query>) -> impl IntoResponse { +pub async fn route( + Query(params): Query>, + headers: HeaderMap, + ConnectInfo(addr): ConnectInfo, +) -> impl IntoResponse { let query = params .get("q") .cloned() @@ -135,6 +146,20 @@ pub async fn route(Query(params): Query>) -> impl IntoRe ); } + let query = SearchQuery { + query, + request_headers: headers + .into_iter() + .map(|(k, v)| { + ( + k.map(|k| k.to_string()).unwrap_or_default(), + v.to_str().unwrap_or_default().to_string(), + ) + }) + .collect(), + ip: addr.ip().to_string(), + }; + let s = stream! { type R = Result; @@ -142,7 +167,7 @@ pub async fn route(Query(params): Query>) -> impl IntoRe let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel(); - let search_future = tokio::spawn(async move { engines::search(&query, progress_tx).await }); + let search_future = tokio::spawn(async move { engines::search(query, progress_tx).await }); while let Some(progress_update) = progress_rx.recv().await { let progress_html = format!(