From e1dadc2ebb53e2b12c19b6405f34ac3cec44fddd Mon Sep 17 00:00:00 2001 From: mat Date: Thu, 4 Apr 2024 04:51:19 +0000 Subject: [PATCH] add thesaurus --- src/engines/answer.rs | 1 + src/engines/answer/calc.rs | 6 +- src/engines/answer/dictionary.rs | 6 +- src/engines/answer/ip.rs | 2 +- src/engines/answer/thesaurus.rs | 222 ++++++++++++++++++++++++ src/engines/answer/timezone.rs | 16 +- src/engines/answer/useragent.rs | 2 +- src/engines/answer/wikipedia.rs | 4 +- src/engines/mod.rs | 2 + src/engines/postsearch/docs_rs.rs | 6 +- src/engines/postsearch/github.rs | 2 +- src/engines/postsearch/stackexchange.rs | 2 +- src/web/assets/style.css | 42 ++++- 13 files changed, 289 insertions(+), 24 deletions(-) create mode 100644 src/engines/answer/thesaurus.rs diff --git a/src/engines/answer.rs b/src/engines/answer.rs index 501e092..b4b998b 100644 --- a/src/engines/answer.rs +++ b/src/engines/answer.rs @@ -1,6 +1,7 @@ pub mod calc; pub mod dictionary; pub mod ip; +pub mod thesaurus; pub mod timezone; pub mod useragent; pub mod wikipedia; diff --git a/src/engines/answer/calc.rs b/src/engines/answer/calc.rs index dbd9987..2709d27 100644 --- a/src/engines/answer/calc.rs +++ b/src/engines/answer/calc.rs @@ -17,7 +17,7 @@ pub fn request(query: &str) -> EngineResponse { EngineResponse::answer_html(format!( r#"

{query} =

{result_html}

"#, - query = html_escape::encode_text(&query), + query = html_escape::encode_safe(&query), )) } @@ -69,11 +69,11 @@ fn evaluate(query: &str, html: bool) -> Option { _ => "", }; if class.is_empty() { - result_html.push_str(&html_escape::encode_text(&span.text)); + result_html.push_str(&html_escape::encode_safe(&span.text)); } else { result_html.push_str(&format!( r#"{text}"#, - text = html_escape::encode_text(&span.text) + text = html_escape::encode_safe(&span.text) )); } } diff --git a/src/engines/answer/dictionary.rs b/src/engines/answer/dictionary.rs index 2d7f80f..8533b45 100644 --- a/src/engines/answer/dictionary.rs +++ b/src/engines/answer/dictionary.rs @@ -73,8 +73,8 @@ pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> eyre::Result html.push_str(&format!( "

{word}

", - mediawiki_key = html_escape::encode_text(mediawiki_key), - word = html_escape::encode_text(&word), + mediawiki_key = html_escape::encode_safe(mediawiki_key), + word = html_escape::encode_safe(&word), )); let mut cleaner = ammonia::Builder::default(); @@ -87,7 +87,7 @@ pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> eyre::Result for entry in entries { html.push_str(&format!( "{part_of_speech}", - part_of_speech = html_escape::encode_text(&entry.part_of_speech.to_lowercase()) + part_of_speech = html_escape::encode_safe(&entry.part_of_speech.to_lowercase()) )); html.push_str("
    "); diff --git a/src/engines/answer/ip.rs b/src/engines/answer/ip.rs index 2b56410..52d13f6 100644 --- a/src/engines/answer/ip.rs +++ b/src/engines/answer/ip.rs @@ -11,6 +11,6 @@ pub fn request(query: &SearchQuery) -> EngineResponse { EngineResponse::answer_html(format!( r#"

    {ip}

    "#, - ip = html_escape::encode_text(ip) + ip = html_escape::encode_safe(ip) )) } diff --git a/src/engines/answer/thesaurus.rs b/src/engines/answer/thesaurus.rs new file mode 100644 index 0000000..ad532aa --- /dev/null +++ b/src/engines/answer/thesaurus.rs @@ -0,0 +1,222 @@ +use eyre::eyre; +use scraper::{Html, Selector}; +use serde::Deserialize; +use url::Url; + +use crate::engines::{EngineResponse, RequestResponse, CLIENT}; + +use super::regex; + +pub fn request(query: &str) -> RequestResponse { + let re = regex!(r"^synonyms for\s+(\w+)$"); + let query = match re.captures(query) { + Some(caps) => caps.get(1).unwrap().as_str(), + None => return RequestResponse::None, + } + .to_lowercase(); + + CLIENT + .get( + Url::parse( + format!( + "https://www.thesaurus.com/browse/{}", + urlencoding::encode(&query.to_lowercase()) + ) + .as_str(), + ) + .unwrap(), + ) + .into() +} + +#[derive(Debug, Deserialize)] +pub struct ThesaurusResponse { + /// Example: `silly` + pub word: String, + pub items: Vec, +} + +#[derive(Debug, Deserialize)] +pub struct ThesaurusItem { + /// Example `adjective` + pub part_of_speech: String, + /// Example: `absurd, giddy, foolish` + pub as_in: String, + + pub strongest_matches: Vec, + pub strong_matches: Vec, + pub weak_matches: Vec, +} + +pub fn parse_response(body: &str) -> eyre::Result { + let response = parse_thesaurus_com_response(body)?; + let rendered_html = render_thesaurus_html(response); + + Ok(EngineResponse::answer_html(rendered_html)) +} + +fn parse_thesaurus_com_response(body: &str) -> eyre::Result { + let dom = Html::parse_document(body); + + let word = dom + .select(&Selector::parse("h1").unwrap()) + .next() + .ok_or_else(|| eyre!("No title found"))? + .text() + .collect::(); + + let card_sel = Selector::parse("[data-type='synonym-and-antonym-card']").unwrap(); + let card_els = dom.select(&card_sel); + + let mut items = Vec::::new(); + + for synonym_and_antonym_card_el in card_els { + items.push(parse_thesaurus_com_item(synonym_and_antonym_card_el)?); + } + + Ok(ThesaurusResponse { word, items }) +} + +fn parse_thesaurus_com_item( + synonym_and_antonym_card_el: scraper::ElementRef, +) -> eyre::Result { + let adjective_as_in_words = synonym_and_antonym_card_el + .select(&Selector::parse("div:first-child > p").unwrap()) + .next() + .ok_or_else(|| eyre!("No adjective as in words found"))? + .text() + .collect::(); + let (part_of_speech, as_in) = adjective_as_in_words + .split_once(" as in ") + .ok_or_else(|| eyre!("No 'as in' found"))?; + let part_of_speech = part_of_speech.trim().to_owned(); + let as_in = as_in.trim().to_owned(); + + let matches_container_el = synonym_and_antonym_card_el + .select(&Selector::parse("div:nth-child(2) > div:nth-child(2)").unwrap()) + .next() + .ok_or_else(|| eyre!("No matches container found"))?; + + let mut strongest_matches = Vec::::new(); + let mut strong_matches = Vec::::new(); + let mut weak_matches = Vec::::new(); + + for match_el in matches_container_el.select(&Selector::parse("div").unwrap()) { + let match_type = match_el + .select(&Selector::parse("p").unwrap()) + .next() + .ok_or_else(|| eyre!("No match type found"))? + .text() + .collect::(); + let match_type = match_type + .split(' ') + .next() + .ok_or_else(|| eyre!("No match type found"))?; + + let matches = match_el + .select(&Selector::parse("a").unwrap()) + .map(|el| el.text().collect::()) + .collect::>(); + + match match_type { + "Strongest" => { + strongest_matches = matches; + } + "Strong" => { + strong_matches = matches; + } + "Weak" => { + weak_matches = matches; + } + _ => { + eprintln!("Unknown thesaurus match type: {match_type}"); + } + } + } + + Ok(ThesaurusItem { + part_of_speech, + as_in, + strongest_matches, + strong_matches, + weak_matches, + }) +} + +fn render_thesaurus_html(ThesaurusResponse { word, items }: ThesaurusResponse) -> String { + let mut html = String::new(); + + html.push_str(&format!( + "

    {word}

    ", + word = html_escape::encode_safe(&word) + )); + + html.push_str("
    "); + for item in items { + html.push_str("
    "); + html.push_str(&render_thesaurus_item_html(item)); + html.push_str("
    "); + } + html.push_str("
    "); + + html +} + +fn render_thesaurus_item_html( + ThesaurusItem { + part_of_speech, + as_in, + strongest_matches, + strong_matches, + weak_matches, + }: ThesaurusItem, +) -> String { + let mut html = String::new(); + + html.push_str(&format!( + "{part_of_speech}, as in {as_in}", + part_of_speech = html_escape::encode_safe(&part_of_speech.to_lowercase()), + as_in = html_escape::encode_safe(&as_in) + )); + + let render_matches = |matches: Vec, strength: &str| { + if matches.is_empty() { + return String::new(); + } + + let mut html = String::new(); + + html.push_str(&format!( + "
    ", + strength_id = html_escape::encode_safe(&strength.to_lowercase().replace(' ', "-")) + )); + + html.push_str(&format!( + "

    {strength} {match_or_matches}

    ", + strength = html_escape::encode_safe(&strength), + match_or_matches = if matches.len() == 1 { + "match" + } else { + "matches" + } + )); + html.push_str("
      "); + for synonym in matches { + html.push_str(&format!( + "
    • {synonym}
    • ", + synonym = html_escape::encode_safe(&synonym) + )); + } + html.push_str("
    "); + + html.push_str("
    "); + + html + }; + + html.push_str(&render_matches(strongest_matches, "Strongest")); + html.push_str(&render_matches(strong_matches, "Strong")); + html.push_str(&render_matches(weak_matches, "Weak")); + + html +} diff --git a/src/engines/answer/timezone.rs b/src/engines/answer/timezone.rs index f46f7be..ea1d2a0 100644 --- a/src/engines/answer/timezone.rs +++ b/src/engines/answer/timezone.rs @@ -11,9 +11,9 @@ pub fn request(query: &str) -> EngineResponse { Some(TimeResponse::Current { time, timezone }) => EngineResponse::answer_html(format!( r#"

    Current time in {timezone}

    {time} ({date})

    "#, - time = html_escape::encode_text(&time.format("%-I:%M %P").to_string()), - date = html_escape::encode_text(&time.format("%B %-d").to_string()), - timezone = html_escape::encode_text(&timezone_to_string(timezone)), + time = html_escape::encode_safe(&time.format("%-I:%M %P").to_string()), + date = html_escape::encode_safe(&time.format("%B %-d").to_string()), + timezone = html_escape::encode_safe(&timezone_to_string(timezone)), )), Some(TimeResponse::Conversion { source_timezone, @@ -25,11 +25,11 @@ pub fn request(query: &str) -> EngineResponse { }) => EngineResponse::answer_html(format!( r#"

    {source_time} {source_timezone} to {target_timezone}

    {target_time} {target_timezone} ({delta})

    "#, - source_time = html_escape::encode_text(&source_time.format("%-I:%M %P").to_string()), - target_time = html_escape::encode_text(&target_time.format("%-I:%M %P").to_string()), - source_timezone = html_escape::encode_text(&timezone_to_string(source_timezone)), - target_timezone = html_escape::encode_text(&timezone_to_string(target_timezone)), - delta = html_escape::encode_text(&{ + source_time = html_escape::encode_safe(&source_time.format("%-I:%M %P").to_string()), + target_time = html_escape::encode_safe(&target_time.format("%-I:%M %P").to_string()), + source_timezone = html_escape::encode_safe(&timezone_to_string(source_timezone)), + target_timezone = html_escape::encode_safe(&timezone_to_string(target_timezone)), + delta = html_escape::encode_safe(&{ let delta_minutes = (target_offset - source_offset).num_minutes(); if delta_minutes % 60 == 0 { format!("{:+}", delta_minutes / 60) diff --git a/src/engines/answer/useragent.rs b/src/engines/answer/useragent.rs index 9d325ef..ee9418c 100644 --- a/src/engines/answer/useragent.rs +++ b/src/engines/answer/useragent.rs @@ -14,7 +14,7 @@ pub fn request(query: &SearchQuery) -> EngineResponse { EngineResponse::answer_html(if let Some(user_agent) = user_agent { format!( "

    {user_agent}

    ", - user_agent = html_escape::encode_text(user_agent) + user_agent = html_escape::encode_safe(user_agent) ) } else { "You don't have a user agent".to_string() diff --git a/src/engines/answer/wikipedia.rs b/src/engines/answer/wikipedia.rs index 3f8139f..a0ba7da 100644 --- a/src/engines/answer/wikipedia.rs +++ b/src/engines/answer/wikipedia.rs @@ -91,7 +91,7 @@ pub fn parse_response(body: &str) -> eyre::Result { Ok(EngineResponse::infobox_html(format!( r#"

    {title}

    {extract}

    "#, page_url = html_escape::encode_quoted_attribute(&page_url), - title = html_escape::encode_text(title), - extract = html_escape::encode_text(&extract), + title = html_escape::encode_safe(title), + extract = html_escape::encode_safe(&extract), ))) } diff --git a/src/engines/mod.rs b/src/engines/mod.rs index ac0a6ed..864a6ee 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -34,6 +34,7 @@ engines! { Calc = "calc", Wikipedia = "wikipedia", Dictionary = "dictionary", + Thesaurus = "thesaurus", Timezone = "timezone", // post-search StackExchange = "stackexchange", @@ -61,6 +62,7 @@ engine_requests! { Calc => answer::calc::request, None, Wikipedia => answer::wikipedia::request, parse_response, Dictionary => answer::dictionary::request, parse_response, + Thesaurus => answer::thesaurus::request, parse_response, Timezone => answer::timezone::request, None, } diff --git a/src/engines/postsearch/docs_rs.rs b/src/engines/postsearch/docs_rs.rs index 34d41f1..f9ce134 100644 --- a/src/engines/postsearch/docs_rs.rs +++ b/src/engines/postsearch/docs_rs.rs @@ -57,14 +57,14 @@ pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> Option{category} {title} {version}"#, url = html_escape::encode_quoted_attribute(&url.to_string()), - title = html_escape::encode_text(&title), - version = html_escape::encode_text(&version), + title = html_escape::encode_safe(&title), + version = html_escape::encode_safe(&version), ) } else { format!( r#"

    {category} {title}

    "#, url = html_escape::encode_quoted_attribute(&url.to_string()), - title = html_escape::encode_text(&title), + title = html_escape::encode_safe(&title), ) }; diff --git a/src/engines/postsearch/github.rs b/src/engines/postsearch/github.rs index 8f1e9c7..08d3ba5 100644 --- a/src/engines/postsearch/github.rs +++ b/src/engines/postsearch/github.rs @@ -72,6 +72,6 @@ pub fn parse_response(body: &str) -> Option { r#"

    {title}

    {readme_html}
    "#, url = html_escape::encode_quoted_attribute(&url), - title = html_escape::encode_text(&title), + title = html_escape::encode_safe(&title), )) } diff --git a/src/engines/postsearch/stackexchange.rs b/src/engines/postsearch/stackexchange.rs index 932dab0..6ed8655 100644 --- a/src/engines/postsearch/stackexchange.rs +++ b/src/engines/postsearch/stackexchange.rs @@ -59,6 +59,6 @@ pub fn parse_response(body: &str) -> Option { r#"

    {title}

    {answer_html}
    "#, url = html_escape::encode_quoted_attribute(&url.to_string()), - title = html_escape::encode_text(&title), + title = html_escape::encode_safe(&title), )) } diff --git a/src/web/assets/style.css b/src/web/assets/style.css index 40cddf8..2bdd5ba 100644 --- a/src/web/assets/style.css +++ b/src/web/assets/style.css @@ -238,7 +238,8 @@ h1 { color: var(--syntax-special); } -.answer-dictionary-word { +.answer-dictionary-word, +.answer-thesaurus-word { margin-top: 0; } .answer-dictionary-part-of-speech { @@ -249,6 +250,45 @@ h1 { margin-bottom: 0.5em; } +.answer-thesaurus-item:not(:last-child) { + border-bottom: 1px solid var(--bg-4); + margin-bottom: 1rem; + padding-bottom: 1rem; +} +.answer-thesaurus-word-description { + font-style: italic; + opacity: 0.8; +} +.answer-thesaurus-part-of-speech { + font-weight: bold; +} +.answer-thesaurus-as-in { + font-style: italic; +} +h3.answer-thesaurus-category-title { + margin-top: 0.5rem; +} +.answer-thesaurus-strongest { + opacity: 1; +} +.answer-thesaurus-strong { + opacity: 0.8; +} +.answer-thesaurus-weak { + opacity: 0.6; +} +.answer-thesaurus-list { + margin: 0; + padding: 0; + display: flex; + flex-wrap: wrap; + gap: 0.8em; + list-style-type: none; +} +.answer-thesaurus-list a { + text-decoration: underline; +} + /* infobox */ .infobox { margin-bottom: 1rem;