From 569922aab75325261166a94df0e83afe8d57afbf Mon Sep 17 00:00:00 2001 From: Shrecknt <58538423+Shrecknt@users.noreply.github.com> Date: Mon, 15 Apr 2024 19:49:40 -0700 Subject: [PATCH] more more quick answers * notepad * fix config * mdn docs * search bar fix * remove unnecessary check * notepad fix * make config avaliable to all stages of request * make config more consistent * minecraft wiki infobox * cleanup * engine_list_separator fix * are you happy now mta * change Arc to &Config in places --------- Co-authored-by: mat --- default-config.toml | 9 ++- src/config.rs | 12 ++-- src/engines/answer/dictionary.rs | 4 +- src/engines/answer/notepad.rs | 7 +- src/engines/mod.rs | 26 +++++-- src/engines/postsearch.rs | 1 + src/engines/postsearch/docs_rs.rs | 2 +- src/engines/postsearch/mdn.rs | 31 +++++++-- src/engines/postsearch/minecraft_wiki.rs | 88 ++++++++++++++++++++++++ src/web/assets/script.js | 3 +- src/web/assets/style.css | 21 ++++++ src/web/search.rs | 36 ++++++---- 12 files changed, 208 insertions(+), 32 deletions(-) create mode 100644 src/engines/postsearch/minecraft_wiki.rs diff --git a/default-config.toml b/default-config.toml index a74cac3..a926a75 100644 --- a/default-config.toml +++ b/default-config.toml @@ -1,11 +1,13 @@ bind = "0.0.0.0:28019" +engine_list_separator = false + [engines] google = { weight = 1.05 } bing = { weight = 1.0 } brave = { weight = 1.25 } -google-scholar = { enabled = false, weight = 0.50 } +google_scholar = { enabled = false, weight = 0.50 } rightdao = { enabled = false, weight = 0.10 } stract = { enabled = false, weight = 0.15 } yep = { enabled = false, weight = 0.10 } @@ -17,3 +19,8 @@ fend = { enabled = false, weight = 10 } [engines.marginalia] args = { profile = "corpo", js = "default", adtech = "default" } weight = 0.15 + +[engines.mdn] +# the number of sections of text to display +# 1 is just the summary and 0 is no limit +max_sections = 1 diff --git a/src/config.rs b/src/config.rs index 5cf575a..e545066 100644 --- a/src/config.rs +++ b/src/config.rs @@ -6,9 +6,11 @@ use tracing::info; use crate::engines::Engine; -#[derive(Deserialize)] +#[derive(Deserialize, Debug)] pub struct Config { pub bind: SocketAddr, + #[serde(default)] + pub engine_list_separator: Option, pub engines: EnginesConfig, } @@ -34,6 +36,8 @@ impl Config { // use the default for something. pub fn update(&mut self, other: Self) { self.bind = other.bind; + self.engine_list_separator = self.engine_list_separator.or(other.engine_list_separator); + assert_ne!(self.engine_list_separator, None); for (key, value) in other.engines.map { if let Some(existing) = self.engines.map.get_mut(&key) { existing.update(value); @@ -44,7 +48,7 @@ impl Config { } } -#[derive(Deserialize)] +#[derive(Deserialize, Debug)] pub struct EnginesConfig { #[serde(flatten)] pub map: HashMap, @@ -76,7 +80,7 @@ impl EnginesConfig { } } -#[derive(Deserialize, Clone)] +#[derive(Deserialize, Clone, Debug)] #[serde(untagged)] pub enum DefaultableEngineConfig { Boolean(bool), @@ -99,7 +103,7 @@ impl Default for DefaultableEngineConfig { } } -#[derive(Deserialize, Clone)] +#[derive(Deserialize, Clone, Debug)] pub struct FullEngineConfig { #[serde(default = "default_true")] pub enabled: bool, diff --git a/src/engines/answer/dictionary.rs b/src/engines/answer/dictionary.rs index 8533b45..db432dd 100644 --- a/src/engines/answer/dictionary.rs +++ b/src/engines/answer/dictionary.rs @@ -50,7 +50,9 @@ pub struct WiktionaryDefinition { pub examples: Vec, } -pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> eyre::Result { +pub fn parse_response( + HttpResponse { res, body, .. }: &HttpResponse, +) -> eyre::Result { let url = res.url(); let Ok(res) = serde_json::from_str::(body) else { diff --git a/src/engines/answer/notepad.rs b/src/engines/answer/notepad.rs index 0e543db..425dc12 100644 --- a/src/engines/answer/notepad.rs +++ b/src/engines/answer/notepad.rs @@ -7,7 +7,8 @@ pub fn request(query: &SearchQuery) -> EngineResponse { return EngineResponse::new(); } - EngineResponse::answer_html( - r#"
"#.to_string() - ) + // This allows pasting styles which is undesired behavior, and the + // `contenteditable="plaintext-only"` attribute currently only works on Chrome. + // This should be updated when the attribute becomes available in more browsers + EngineResponse::answer_html(r#"
"#.to_string()) } diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 715ef14..10691dd 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -28,7 +28,7 @@ pub mod search; engines! { // search Google = "google", - GoogleScholar = "google-scholar", + GoogleScholar = "google_scholar", Bing = "bing", Brave = "brave", Marginalia = "marginalia", @@ -49,6 +49,7 @@ engines! { DocsRs = "docs_rs", GitHub = "github", Mdn = "mdn", + MinecraftWiki = "minecraft_wiki", StackExchange = "stackexchange", } @@ -84,6 +85,7 @@ engine_postsearch_requests! { DocsRs => postsearch::docs_rs::request, parse_response, GitHub => postsearch::github::request, parse_response, Mdn => postsearch::mdn::request, parse_response, + MinecraftWiki => postsearch::minecraft_wiki::request, parse_response, StackExchange => postsearch::stackexchange::request, parse_response, } @@ -154,6 +156,7 @@ impl From> for RequestAutocompleteResponse { pub struct HttpResponse { pub res: reqwest::Response, pub body: String, + pub config: Arc, } impl<'a> From<&'a HttpResponse> for &'a str { @@ -302,7 +305,11 @@ pub async fn search( start_time, ))?; - let http_response = HttpResponse { res, body }; + let http_response = HttpResponse { + res, + body, + config: query.config.clone(), + }; let response = match engine.parse_response(&http_response) { Ok(response) => response, @@ -339,7 +346,7 @@ pub async fn search( join_all(response_futures).await.into_iter().collect(); let responses = responses_result?; - let response = merge_engine_responses(&query.config, responses); + let response = merge_engine_responses(query.config.clone(), responses); let has_infobox = response.infobox.is_some(); @@ -368,7 +375,11 @@ pub async fn search( } let body = String::from_utf8_lossy(&body_bytes).to_string(); - let http_response = HttpResponse { res, body }; + let http_response = HttpResponse { + res, + body, + config: query.config.clone(), + }; engine.postsearch_parse_response(&http_response) } Err(e) => { @@ -464,6 +475,7 @@ pub struct Response { pub featured_snippet: Option, pub answer: Option, pub infobox: Option, + pub config: Arc, } #[derive(Debug, Clone)] @@ -495,7 +507,10 @@ pub struct Infobox { pub engine: Engine, } -fn merge_engine_responses(config: &Config, responses: HashMap) -> Response { +fn merge_engine_responses( + config: Arc, + responses: HashMap, +) -> Response { let mut search_results: Vec = Vec::new(); let mut featured_snippet: Option = None; let mut answer: Option = None; @@ -596,6 +611,7 @@ fn merge_engine_responses(config: &Config, responses: HashMap Option { None } -pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> Option { +pub fn parse_response(HttpResponse { res, body, .. }: &HttpResponse) -> Option { let url = res.url().clone(); let dom = Html::parse_document(body); diff --git a/src/engines/postsearch/mdn.rs b/src/engines/postsearch/mdn.rs index f391656..99219bf 100644 --- a/src/engines/postsearch/mdn.rs +++ b/src/engines/postsearch/mdn.rs @@ -1,6 +1,13 @@ use scraper::{Html, Selector}; +use serde::Deserialize; +use tracing::error; -use crate::engines::{HttpResponse, Response, CLIENT}; +use crate::engines::{Engine, HttpResponse, Response, CLIENT}; + +#[derive(Deserialize)] +pub struct MdnConfig { + pub max_sections: usize, +} pub fn request(response: &Response) -> Option { for search_result in response.search_results.iter().take(8) { @@ -15,7 +22,16 @@ pub fn request(response: &Response) -> Option { None } -pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> Option { +pub fn parse_response(HttpResponse { res, body, config }: &HttpResponse) -> Option { + let config_toml = config.engines.get(Engine::Mdn).extra.clone(); + let config: MdnConfig = match toml::Value::Table(config_toml).try_into() { + Ok(args) => args, + Err(err) => { + error!("Failed to parse Mdn config: {err}"); + return None; + } + }; + let url = res.url().clone(); let dom = Html::parse_document(body); @@ -30,11 +46,18 @@ pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> Option>() + .join("
"); let doc_html = ammonia::Builder::default() .link_rel(None) diff --git a/src/engines/postsearch/minecraft_wiki.rs b/src/engines/postsearch/minecraft_wiki.rs new file mode 100644 index 0000000..8b565be --- /dev/null +++ b/src/engines/postsearch/minecraft_wiki.rs @@ -0,0 +1,88 @@ +use scraper::{ElementRef, Html, Selector}; + +use crate::engines::{HttpResponse, Response, CLIENT}; + +pub fn request(response: &Response) -> Option { + for search_result in response.search_results.iter().take(8) { + if search_result.url.starts_with("https://minecraft.wiki/w/") { + return Some(CLIENT.get(search_result.url.as_str())); + } + } + + None +} + +pub fn parse_response(HttpResponse { res, body, .. }: &HttpResponse) -> Option { + let url = res.url().clone(); + + let dom = Html::parse_document(body); + + let page_title = dom + .select(&Selector::parse("#firstHeading").unwrap()) + .next()? + .text() + .collect::() + .trim() + .to_string(); + + let doc_query = Selector::parse(".mw-parser-output").unwrap(); + + let doc_html = dom + .select(&doc_query) + .next() + .map(|doc| strip_gallery(doc)) + .unwrap_or_default() + .join(""); + + let doc_html = ammonia::Builder::default() + .link_rel(None) + .add_allowed_classes("div", ["notaninfobox", "mcw-mainpage-icon"]) + .add_allowed_classes("pre", ["noexcerpt", "navigation-not-searchable"]) + .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone())) + .clean(&doc_html) + .to_string(); + + let title_html = format!( + r#"

{title}

"#, + url = html_escape::encode_quoted_attribute(&url.to_string()), + title = html_escape::encode_safe(&page_title), + ); + + Some(format!( + r#"{title_html}
{doc_html}
"# + )) +} + +fn strip_gallery(doc: ElementRef) -> Vec { + let mut gallery = false; + doc.children() + .filter(|elem| { + let value = elem.value(); + if gallery { + return false; + } + match value { + scraper::Node::Element(_) => { + let elem = ElementRef::wrap(*elem).unwrap(); + let is_gallery_title = elem.first_child().map_or(false, |elem| { + elem.value().as_element().map_or(false, |_| { + let elem = ElementRef::wrap(elem).unwrap(); + elem.text().collect::() == "Gallery" + }) + }); + if is_gallery_title { + gallery = true; + return false; + } + true + } + _ => true, + } + }) + .map(|elem| { + ElementRef::wrap(elem) + .map(|elem| elem.html()) + .unwrap_or_default() + }) + .collect() +} diff --git a/src/web/assets/script.js b/src/web/assets/script.js index c08d94d..a74377d 100644 --- a/src/web/assets/script.js +++ b/src/web/assets/script.js @@ -113,7 +113,8 @@ document.addEventListener("keydown", (e) => { const focusedEl = document.querySelector(":focus"); if ( focusedEl && - (focusedEl.tagName == "input" || + (focusedEl.tagName.toLowerCase() == "input" || + focusedEl.tagName.toLowerCase() == "textarea" || focusedEl.getAttribute("contenteditable") !== null) ) return; diff --git a/src/web/assets/style.css b/src/web/assets/style.css index fce2b68..3952799 100644 --- a/src/web/assets/style.css +++ b/src/web/assets/style.css @@ -292,6 +292,18 @@ h3.answer-thesaurus-category-title { .answer-thesaurus-list a { text-decoration: underline; } +.answer-notepad { + width: calc( 100% - 4px ); + height: fit-content; + overflow-y: show; + background-color: transparent; + color: white; + border: none; + outline: none; + min-height: 4em; + font-size: 12px; + resize: none; +} /* infobox */ .infobox { @@ -353,3 +365,12 @@ h3.answer-thesaurus-category-title { .postsearch-infobox p { margin-bottom: 1em; } +.infobox-minecraft_wiki-article > .notaninfobox { + display: none !important; +} +.noexcerpt, .navigation-not-searchable { + display: none !important; +} +.mcw-mainpage-icon { + display: inline-block; +} \ No newline at end of file diff --git a/src/web/search.rs b/src/web/search.rs index a1aab5c..5ebe4d4 100644 --- a/src/web/search.rs +++ b/src/web/search.rs @@ -45,18 +45,29 @@ fn render_end_of_html() -> String { r"".to_string() } -fn render_engine_list(engines: &[engines::Engine]) -> String { +fn render_engine_list(engines: &[engines::Engine], config: &Config) -> String { let mut html = String::new(); + let mut first_iter = true; for engine in engines { + if config.engine_list_separator.unwrap() && !first_iter { + html.push_str(" · "); + } + first_iter = false; + let raw_engine_id = &engine.id(); + let engine_id = if config.engine_list_separator.unwrap() { + raw_engine_id.replace('_', " ") + } else { + raw_engine_id.to_string() + }; html.push_str(&format!( r#"{engine}"#, - engine = encode_text(&engine.id()) + engine = encode_text(&engine_id) )); } format!(r#"
{html}
"#) } -fn render_search_result(result: &engines::SearchResult) -> String { +fn render_search_result(result: &engines::SearchResult, config: &Config) -> String { format!( r#"
@@ -71,11 +82,12 @@ fn render_search_result(result: &engines::SearchResult) -> String { url = encode_text(&result.url), title = encode_text(&result.title), desc = encode_text(&result.description), - engines_html = render_engine_list(&result.engines.iter().copied().collect::>()) + engines_html = + render_engine_list(&result.engines.iter().copied().collect::>(), config) ) } -fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> String { +fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet, config: &Config) -> String { format!( r#"