From c656e65614b52772b4a6728e8621eae4f4e54313 Mon Sep 17 00:00:00 2001 From: mat Date: Wed, 20 Dec 2023 17:17:46 -0600 Subject: [PATCH] add calc answer --- Cargo.lock | 8 ++++ Cargo.toml | 2 + src/engines/answer.rs | 1 + src/engines/answer/calc.rs | 74 +++++++++++++++++++++++++++++++++ src/engines/answer/ip.rs | 2 +- src/engines/answer/useragent.rs | 4 +- src/engines/mod.rs | 52 ++++++++++++++++------- src/engines/search/google.rs | 4 +- src/normalize.rs | 10 +++-- src/parse.rs | 8 +++- src/web/assets/script.js | 4 +- src/web/assets/style.css | 29 +++++++++---- 12 files changed, 167 insertions(+), 31 deletions(-) create mode 100644 src/engines/answer/calc.rs diff --git a/Cargo.lock b/Cargo.lock index 60ce025..0cf59d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -299,6 +299,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "fend-core" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e470ea3be6ce980f4d7f6cc08a6084e7715f2b052eeb1f123f2d4d8fb1d35de1" + [[package]] name = "fnv" version = "1.0.7" @@ -792,8 +798,10 @@ dependencies = [ "base64", "bytes", "eyre", + "fend-core", "futures", "html-escape", + "rand", "regex", "reqwest", "scraper", diff --git a/Cargo.toml b/Cargo.toml index d847b8d..6fdfd28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,8 +12,10 @@ axum = { version = "0.7.2", features = ["http2"] } base64 = "0.21.5" bytes = "1.5.0" eyre = "0.6.11" +fend-core = "1.3.3" futures = "0.3.29" html-escape = "0.2.13" +rand = "0.8.5" regex = "1.10.2" reqwest = { version = "0.11.23", default-features = false, features = [ "rustls-tls", diff --git a/src/engines/answer.rs b/src/engines/answer.rs index f557f14..5b93353 100644 --- a/src/engines/answer.rs +++ b/src/engines/answer.rs @@ -1,3 +1,4 @@ +pub mod calc; pub mod ip; pub mod useragent; diff --git a/src/engines/answer/calc.rs b/src/engines/answer/calc.rs new file mode 100644 index 0000000..61aed79 --- /dev/null +++ b/src/engines/answer/calc.rs @@ -0,0 +1,74 @@ +use crate::engines::{EngineResponse, SearchQuery}; + +pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse { + let query = query.query.as_str(); + + let Some(result_html) = evaluate(query, true) else { + return EngineResponse::new(); + }; + + EngineResponse::answer_html(format!( + r#"

{query} =

+

{result_html}

"#, + query = html_escape::encode_text(query), + )) +} + +pub fn request_autocomplete(_client: &reqwest::Client, query: &str) -> Vec { + let mut results = Vec::new(); + + if let Some(result) = evaluate(query, false) { + results.push(format!("{query} = {result}")); + } + + return results; +} + +fn evaluate(query: &str, html: bool) -> Option { + // at least 3 characters and not one of the short constants + if query.len() < 3 && !matches!(query.to_lowercase().as_str(), "pi" | "e" | "c") { + return None; + } + + let mut context = fend_core::Context::new(); + + // make lowercase f and c work + context.define_custom_unit_v1("f", "f", "°F", &fend_core::CustomUnitAttribute::Alias); + context.define_custom_unit_v1("c", "c", "°C", &fend_core::CustomUnitAttribute::Alias); + // make random work + context.set_random_u32_fn(|| rand::random::()); + if html { + // this makes it generate slightly nicer outputs for some queries like 2d6 + context.set_output_mode_terminal(); + } + + let Ok(result) = fend_core::evaluate(query, &mut context) else { + return None; + }; + let main_result = result.get_main_result(); + if main_result == query { + return None; + } + + if !html { + return Some(main_result.to_string()); + } + + let mut result_html = String::new(); + for span in result.get_main_result_spans() { + let class = match span.kind() { + fend_core::SpanKind::Number | fend_core::SpanKind::Boolean => "answer-calc-constant", + _ => "", + }; + if !class.is_empty() { + result_html.push_str(&format!( + r#"{text}"#, + text = html_escape::encode_text(span.string()) + )); + } else { + result_html.push_str(&html_escape::encode_text(span.string())); + } + } + + return Some(result_html); +} diff --git a/src/engines/answer/ip.rs b/src/engines/answer/ip.rs index 1997cf9..fe9d7b4 100644 --- a/src/engines/answer/ip.rs +++ b/src/engines/answer/ip.rs @@ -9,5 +9,5 @@ pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse let ip = &query.ip; - EngineResponse::answer_html(format!("Your IP address is {ip}")) + EngineResponse::answer_html(format!(r#"

{ip}

"#)) } diff --git a/src/engines/answer/useragent.rs b/src/engines/answer/useragent.rs index f8bc844..5227d29 100644 --- a/src/engines/answer/useragent.rs +++ b/src/engines/answer/useragent.rs @@ -3,7 +3,7 @@ use crate::engines::{EngineResponse, SearchQuery}; use super::regex; pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse { - if !regex!("^what('s|s| is) my (user ?agent|ua)|ua|user ?agent$") + if !regex!("^(what('s|s| is) my (user ?agent|ua)|ua|user ?agent)$") .is_match(&query.query.to_lowercase()) { return EngineResponse::new(); @@ -12,7 +12,7 @@ pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse let user_agent = query.request_headers.get("user-agent"); EngineResponse::answer_html(if let Some(user_agent) = user_agent { - format!("Your user agent is {user_agent}") + format!("

{user_agent}

") } else { format!("You don't have a user agent") }) diff --git a/src/engines/mod.rs b/src/engines/mod.rs index cf4bc86..7c1a2d5 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -8,8 +8,6 @@ use std::{ use futures::future::join_all; use tokio::sync::mpsc; -use self::search::{bing, brave, google}; - pub mod answer; pub mod search; @@ -22,6 +20,7 @@ pub enum Engine { // answer Useragent, Ip, + Calc, } impl Engine { @@ -32,6 +31,7 @@ impl Engine { Engine::Brave, Engine::Useragent, Engine::Ip, + Engine::Calc, ] } @@ -42,6 +42,7 @@ impl Engine { Engine::Brave => "brave", Engine::Useragent => "useragent", Engine::Ip => "ip", + Engine::Calc => "calc", } } @@ -56,19 +57,20 @@ impl Engine { pub fn request(&self, client: &reqwest::Client, query: &SearchQuery) -> RequestResponse { match self { - Engine::Google => google::request(client, query).into(), - Engine::Bing => bing::request(client, query).into(), + Engine::Google => search::google::request(client, query).into(), + Engine::Bing => search::bing::request(client, query).into(), Engine::Brave => search::brave::request(client, query).into(), Engine::Useragent => answer::useragent::request(client, query).into(), Engine::Ip => answer::ip::request(client, query).into(), + Engine::Calc => answer::calc::request(client, query).into(), } } pub fn parse_response(&self, body: &str) -> eyre::Result { match self { - Engine::Google => google::parse_response(body), - Engine::Bing => bing::parse_response(body), - Engine::Brave => brave::parse_response(body), + Engine::Google => search::google::parse_response(body), + Engine::Bing => search::bing::parse_response(body), + Engine::Brave => search::brave::parse_response(body), _ => eyre::bail!("engine {self:?} can't parse response"), } } @@ -77,17 +79,18 @@ impl Engine { &self, client: &reqwest::Client, query: &str, - ) -> Option { + ) -> Option { match self { - Engine::Google => Some(google::request_autocomplete(client, query)), + Engine::Google => Some(search::google::request_autocomplete(client, query).into()), + Engine::Calc => Some(answer::calc::request_autocomplete(client, query).into()), _ => None, } } pub fn parse_autocomplete_response(&self, body: &str) -> eyre::Result> { match self { - Engine::Google => google::parse_autocomplete_response(body), - _ => Ok(Vec::new()), + Engine::Google => search::google::parse_autocomplete_response(body), + _ => eyre::bail!("engine {self:?} can't parse autocomplete response"), } } } @@ -110,7 +113,6 @@ pub enum RequestResponse { Http(reqwest::RequestBuilder), Instant(EngineResponse), } - impl From for RequestResponse { fn from(req: reqwest::RequestBuilder) -> Self { Self::Http(req) @@ -122,6 +124,21 @@ impl From for RequestResponse { } } +pub enum RequestAutocompleteResponse { + Http(reqwest::RequestBuilder), + Instant(Vec), +} +impl From for RequestAutocompleteResponse { + fn from(req: reqwest::RequestBuilder) -> Self { + Self::Http(req) + } +} +impl From> for RequestAutocompleteResponse { + fn from(res: Vec) -> Self { + Self::Instant(res) + } +} + #[derive(Debug)] pub struct EngineSearchResult { pub url: String, @@ -258,9 +275,14 @@ pub async fn autocomplete_with_client_and_engines( for engine in engines { if let Some(request) = engine.request_autocomplete(client, query) { requests.push(async { - let res = request.send().await?; - let body = res.text().await?; - let response = engine.parse_autocomplete_response(&body)?; + let response = match request { + RequestAutocompleteResponse::Http(request) => { + let res = request.send().await?; + let body = res.text().await?; + engine.parse_autocomplete_response(&body)? + } + RequestAutocompleteResponse::Instant(response) => response, + }; Ok((*engine, response)) }); } diff --git a/src/engines/search/google.rs b/src/engines/search/google.rs index 61f502a..c5ce030 100644 --- a/src/engines/search/google.rs +++ b/src/engines/search/google.rs @@ -27,7 +27,9 @@ pub fn parse_response(body: &str) -> eyre::Result { parse_html_response_with_opts( body, ParseOpts::new() - .result("div.g, div.xpd") + // xpd is weird, some results have it but it's usually used for ads? + // the :first-child filters out the ads though since for ads the first child is always a span + .result("div.g > div, div.xpd > div:first-child") .title("h3") .href("a[href]") .description("div[data-sncf], div[style='-webkit-line-clamp:2']") diff --git a/src/normalize.rs b/src/normalize.rs index 40289d0..75cf4bb 100644 --- a/src/normalize.rs +++ b/src/normalize.rs @@ -6,7 +6,10 @@ pub fn normalize_url(url: &str) -> eyre::Result { return Ok(String::new()); } - let mut url = Url::parse(url)?; + let Ok(mut url) = Url::parse(url) else { + eprintln!("failed to parse url: {url}"); + return Ok(url.to_string()); + }; // make sure the scheme is https if url.scheme() == "http" { @@ -22,11 +25,12 @@ pub fn normalize_url(url: &str) -> eyre::Result { url.set_path(path); } - // remove ref_src tracking param + // remove tracking params let query_pairs = url.query_pairs().into_owned(); let mut new_query_pairs = Vec::new(); + const TRACKING_PARAMS: &[&str] = &["ref_src", "_sm_au_"]; for (key, value) in query_pairs { - if key != "ref_src" { + if !TRACKING_PARAMS.contains(&key.as_str()) { new_query_pairs.push((key, value)); } } diff --git a/src/parse.rs b/src/parse.rs index e450d48..b45a11e 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -144,6 +144,12 @@ pub(super) fn parse_html_response_with_opts( let url = normalize_url(&url)?; let description = description_query_method.call(&result)?; + // this can happen on google if you search "roll d6" + let is_empty = description.is_empty() && title.is_empty(); + if is_empty { + continue; + } + search_results.push(EngineSearchResult { url, title, @@ -162,7 +168,7 @@ pub(super) fn parse_html_response_with_opts( let description = featured_snippet_description_query_method.call(&featured_snippet)?; // this can happen on google if you search "what's my user agent" - let is_empty = description.is_empty() && title.is_empty() && url.is_empty(); + let is_empty = description.is_empty() && title.is_empty(); if is_empty { None } else { diff --git a/src/web/assets/script.js b/src/web/assets/script.js index 0f4fb46..1cd6fe0 100644 --- a/src/web/assets/script.js +++ b/src/web/assets/script.js @@ -9,7 +9,9 @@ searchInputEl.insertAdjacentElement("afterend", datalistEl); searchInputEl.addEventListener("input", async (e) => { const value = e.target.value; - const res = await fetch(`/autocomplete?q=${value}`).then((res) => res.json()); + const res = await fetch(`/autocomplete?q=${encodeURIComponent(value)}`).then( + (res) => res.json() + ); const options = res[1]; datalistEl.innerHTML = ""; diff --git a/src/web/assets/style.css b/src/web/assets/style.css index da6afd1..e42d063 100644 --- a/src/web/assets/style.css +++ b/src/web/assets/style.css @@ -97,13 +97,6 @@ h1 { margin-bottom: 1rem; } -/* answer */ -.answer { - margin-bottom: 1rem; - border: 1px solid #234; - padding: 0.5rem; -} - /* progress update */ .progress-updates { margin-bottom: 1rem; @@ -122,3 +115,25 @@ h1 { color: #7fd962; font-weight: bold; } + +/* answer */ +.answer { + margin-bottom: 1rem; + border: 1px solid #234; + padding: 0.5rem; +} +.answer h3 { + margin: 0; + font-weight: normal; + font-size: 1.2rem; +} + +/* styles for specific answers */ +.answer-calc-query { + margin: 0; + opacity: 0.5; +} +.answer-calc-constant { + color: #d2a6ff; + white-space: pre; +}