add thesaurus

This commit is contained in:
mat 2024-04-04 04:51:19 +00:00
parent 017cdd1400
commit e1dadc2ebb
13 changed files with 289 additions and 24 deletions

View File

@ -1,6 +1,7 @@
pub mod calc; pub mod calc;
pub mod dictionary; pub mod dictionary;
pub mod ip; pub mod ip;
pub mod thesaurus;
pub mod timezone; pub mod timezone;
pub mod useragent; pub mod useragent;
pub mod wikipedia; pub mod wikipedia;

View File

@ -17,7 +17,7 @@ pub fn request(query: &str) -> EngineResponse {
EngineResponse::answer_html(format!( EngineResponse::answer_html(format!(
r#"<p class="answer-query">{query} =</p> r#"<p class="answer-query">{query} =</p>
<h3><b>{result_html}</b></h3>"#, <h3><b>{result_html}</b></h3>"#,
query = html_escape::encode_text(&query), query = html_escape::encode_safe(&query),
)) ))
} }
@ -69,11 +69,11 @@ fn evaluate(query: &str, html: bool) -> Option<String> {
_ => "", _ => "",
}; };
if class.is_empty() { if class.is_empty() {
result_html.push_str(&html_escape::encode_text(&span.text)); result_html.push_str(&html_escape::encode_safe(&span.text));
} else { } else {
result_html.push_str(&format!( result_html.push_str(&format!(
r#"<span class="{class}">{text}</span>"#, r#"<span class="{class}">{text}</span>"#,
text = html_escape::encode_text(&span.text) text = html_escape::encode_safe(&span.text)
)); ));
} }
} }

View File

@ -73,8 +73,8 @@ pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> eyre::Result
html.push_str(&format!( html.push_str(&format!(
"<h2 class=\"answer-dictionary-word\"><a href=\"https://en.wiktionary.org/wiki/{mediawiki_key}\">{word}</a></h2>", "<h2 class=\"answer-dictionary-word\"><a href=\"https://en.wiktionary.org/wiki/{mediawiki_key}\">{word}</a></h2>",
mediawiki_key = html_escape::encode_text(mediawiki_key), mediawiki_key = html_escape::encode_safe(mediawiki_key),
word = html_escape::encode_text(&word), word = html_escape::encode_safe(&word),
)); ));
let mut cleaner = ammonia::Builder::default(); let mut cleaner = ammonia::Builder::default();
@ -87,7 +87,7 @@ pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> eyre::Result
for entry in entries { for entry in entries {
html.push_str(&format!( html.push_str(&format!(
"<span class=\"answer-dictionary-part-of-speech\">{part_of_speech}</span>", "<span class=\"answer-dictionary-part-of-speech\">{part_of_speech}</span>",
part_of_speech = html_escape::encode_text(&entry.part_of_speech.to_lowercase()) part_of_speech = html_escape::encode_safe(&entry.part_of_speech.to_lowercase())
)); ));
html.push_str("<ol>"); html.push_str("<ol>");

View File

@ -11,6 +11,6 @@ pub fn request(query: &SearchQuery) -> EngineResponse {
EngineResponse::answer_html(format!( EngineResponse::answer_html(format!(
r#"<h3><b>{ip}</b></h3>"#, r#"<h3><b>{ip}</b></h3>"#,
ip = html_escape::encode_text(ip) ip = html_escape::encode_safe(ip)
)) ))
} }

View File

@ -0,0 +1,222 @@
use eyre::eyre;
use scraper::{Html, Selector};
use serde::Deserialize;
use url::Url;
use crate::engines::{EngineResponse, RequestResponse, CLIENT};
use super::regex;
pub fn request(query: &str) -> RequestResponse {
let re = regex!(r"^synonyms for\s+(\w+)$");
let query = match re.captures(query) {
Some(caps) => caps.get(1).unwrap().as_str(),
None => return RequestResponse::None,
}
.to_lowercase();
CLIENT
.get(
Url::parse(
format!(
"https://www.thesaurus.com/browse/{}",
urlencoding::encode(&query.to_lowercase())
)
.as_str(),
)
.unwrap(),
)
.into()
}
#[derive(Debug, Deserialize)]
pub struct ThesaurusResponse {
/// Example: `silly`
pub word: String,
pub items: Vec<ThesaurusItem>,
}
#[derive(Debug, Deserialize)]
pub struct ThesaurusItem {
/// Example `adjective`
pub part_of_speech: String,
/// Example: `absurd, giddy, foolish`
pub as_in: String,
pub strongest_matches: Vec<String>,
pub strong_matches: Vec<String>,
pub weak_matches: Vec<String>,
}
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
let response = parse_thesaurus_com_response(body)?;
let rendered_html = render_thesaurus_html(response);
Ok(EngineResponse::answer_html(rendered_html))
}
fn parse_thesaurus_com_response(body: &str) -> eyre::Result<ThesaurusResponse> {
let dom = Html::parse_document(body);
let word = dom
.select(&Selector::parse("h1").unwrap())
.next()
.ok_or_else(|| eyre!("No title found"))?
.text()
.collect::<String>();
let card_sel = Selector::parse("[data-type='synonym-and-antonym-card']").unwrap();
let card_els = dom.select(&card_sel);
let mut items = Vec::<ThesaurusItem>::new();
for synonym_and_antonym_card_el in card_els {
items.push(parse_thesaurus_com_item(synonym_and_antonym_card_el)?);
}
Ok(ThesaurusResponse { word, items })
}
fn parse_thesaurus_com_item(
synonym_and_antonym_card_el: scraper::ElementRef,
) -> eyre::Result<ThesaurusItem> {
let adjective_as_in_words = synonym_and_antonym_card_el
.select(&Selector::parse("div:first-child > p").unwrap())
.next()
.ok_or_else(|| eyre!("No adjective as in words found"))?
.text()
.collect::<String>();
let (part_of_speech, as_in) = adjective_as_in_words
.split_once(" as in ")
.ok_or_else(|| eyre!("No 'as in' found"))?;
let part_of_speech = part_of_speech.trim().to_owned();
let as_in = as_in.trim().to_owned();
let matches_container_el = synonym_and_antonym_card_el
.select(&Selector::parse("div:nth-child(2) > div:nth-child(2)").unwrap())
.next()
.ok_or_else(|| eyre!("No matches container found"))?;
let mut strongest_matches = Vec::<String>::new();
let mut strong_matches = Vec::<String>::new();
let mut weak_matches = Vec::<String>::new();
for match_el in matches_container_el.select(&Selector::parse("div").unwrap()) {
let match_type = match_el
.select(&Selector::parse("p").unwrap())
.next()
.ok_or_else(|| eyre!("No match type found"))?
.text()
.collect::<String>();
let match_type = match_type
.split(' ')
.next()
.ok_or_else(|| eyre!("No match type found"))?;
let matches = match_el
.select(&Selector::parse("a").unwrap())
.map(|el| el.text().collect::<String>())
.collect::<Vec<String>>();
match match_type {
"Strongest" => {
strongest_matches = matches;
}
"Strong" => {
strong_matches = matches;
}
"Weak" => {
weak_matches = matches;
}
_ => {
eprintln!("Unknown thesaurus match type: {match_type}");
}
}
}
Ok(ThesaurusItem {
part_of_speech,
as_in,
strongest_matches,
strong_matches,
weak_matches,
})
}
fn render_thesaurus_html(ThesaurusResponse { word, items }: ThesaurusResponse) -> String {
let mut html = String::new();
html.push_str(&format!(
"<h2 class=\"answer-thesaurus-word\"><a href=\"https://www.thesaurus.com/browse/{word}\">{word}</a></h2>",
word = html_escape::encode_safe(&word)
));
html.push_str("<div class=\"answer-thesaurus-items\">");
for item in items {
html.push_str("<div class=\"answer-thesaurus-item\">");
html.push_str(&render_thesaurus_item_html(item));
html.push_str("</div>");
}
html.push_str("</div>");
html
}
fn render_thesaurus_item_html(
ThesaurusItem {
part_of_speech,
as_in,
strongest_matches,
strong_matches,
weak_matches,
}: ThesaurusItem,
) -> String {
let mut html = String::new();
html.push_str(&format!(
"<span class=\"answer-thesaurus-word-description\"><span class=\"answer-thesaurus-part-of-speech\">{part_of_speech}</span>, as in <span class=\"answer-thesaurus-as-in\">{as_in}</span></span>",
part_of_speech = html_escape::encode_safe(&part_of_speech.to_lowercase()),
as_in = html_escape::encode_safe(&as_in)
));
let render_matches = |matches: Vec<String>, strength: &str| {
if matches.is_empty() {
return String::new();
}
let mut html = String::new();
html.push_str(&format!(
"<div class=\"answer-thesaurus-{strength_id}\">",
strength_id = html_escape::encode_safe(&strength.to_lowercase().replace(' ', "-"))
));
html.push_str(&format!(
"<h3 class=\"answer-thesaurus-category-title\">{strength} {match_or_matches}</h3>",
strength = html_escape::encode_safe(&strength),
match_or_matches = if matches.len() == 1 {
"match"
} else {
"matches"
}
));
html.push_str("<ul class=\"answer-thesaurus-list\">");
for synonym in matches {
html.push_str(&format!(
"<li><a href=\"https://www.thesaurus.com/browse/{synonym}\">{synonym}</a></li>",
synonym = html_escape::encode_safe(&synonym)
));
}
html.push_str("</ul>");
html.push_str("</div>");
html
};
html.push_str(&render_matches(strongest_matches, "Strongest"));
html.push_str(&render_matches(strong_matches, "Strong"));
html.push_str(&render_matches(weak_matches, "Weak"));
html
}

View File

@ -11,9 +11,9 @@ pub fn request(query: &str) -> EngineResponse {
Some(TimeResponse::Current { time, timezone }) => EngineResponse::answer_html(format!( Some(TimeResponse::Current { time, timezone }) => EngineResponse::answer_html(format!(
r#"<p class="answer-query">Current time in {timezone}</p> r#"<p class="answer-query">Current time in {timezone}</p>
<h3><b>{time}</b> <span class="answer-comment">({date})</span></h3>"#, <h3><b>{time}</b> <span class="answer-comment">({date})</span></h3>"#,
time = html_escape::encode_text(&time.format("%-I:%M %P").to_string()), time = html_escape::encode_safe(&time.format("%-I:%M %P").to_string()),
date = html_escape::encode_text(&time.format("%B %-d").to_string()), date = html_escape::encode_safe(&time.format("%B %-d").to_string()),
timezone = html_escape::encode_text(&timezone_to_string(timezone)), timezone = html_escape::encode_safe(&timezone_to_string(timezone)),
)), )),
Some(TimeResponse::Conversion { Some(TimeResponse::Conversion {
source_timezone, source_timezone,
@ -25,11 +25,11 @@ pub fn request(query: &str) -> EngineResponse {
}) => EngineResponse::answer_html(format!( }) => EngineResponse::answer_html(format!(
r#"<p class="answer-query">{source_time} {source_timezone} to {target_timezone}</p> r#"<p class="answer-query">{source_time} {source_timezone} to {target_timezone}</p>
<h3><b>{target_time}</b> <span class="answer-comment">{target_timezone} ({delta})</span></h3>"#, <h3><b>{target_time}</b> <span class="answer-comment">{target_timezone} ({delta})</span></h3>"#,
source_time = html_escape::encode_text(&source_time.format("%-I:%M %P").to_string()), source_time = html_escape::encode_safe(&source_time.format("%-I:%M %P").to_string()),
target_time = html_escape::encode_text(&target_time.format("%-I:%M %P").to_string()), target_time = html_escape::encode_safe(&target_time.format("%-I:%M %P").to_string()),
source_timezone = html_escape::encode_text(&timezone_to_string(source_timezone)), source_timezone = html_escape::encode_safe(&timezone_to_string(source_timezone)),
target_timezone = html_escape::encode_text(&timezone_to_string(target_timezone)), target_timezone = html_escape::encode_safe(&timezone_to_string(target_timezone)),
delta = html_escape::encode_text(&{ delta = html_escape::encode_safe(&{
let delta_minutes = (target_offset - source_offset).num_minutes(); let delta_minutes = (target_offset - source_offset).num_minutes();
if delta_minutes % 60 == 0 { if delta_minutes % 60 == 0 {
format!("{:+}", delta_minutes / 60) format!("{:+}", delta_minutes / 60)

View File

@ -14,7 +14,7 @@ pub fn request(query: &SearchQuery) -> EngineResponse {
EngineResponse::answer_html(if let Some(user_agent) = user_agent { EngineResponse::answer_html(if let Some(user_agent) = user_agent {
format!( format!(
"<h3><b>{user_agent}</b></h3>", "<h3><b>{user_agent}</b></h3>",
user_agent = html_escape::encode_text(user_agent) user_agent = html_escape::encode_safe(user_agent)
) )
} else { } else {
"You don't have a user agent".to_string() "You don't have a user agent".to_string()

View File

@ -91,7 +91,7 @@ pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
Ok(EngineResponse::infobox_html(format!( Ok(EngineResponse::infobox_html(format!(
r#"<a href="{page_url}"><h2>{title}</h2></a><p>{extract}</p>"#, r#"<a href="{page_url}"><h2>{title}</h2></a><p>{extract}</p>"#,
page_url = html_escape::encode_quoted_attribute(&page_url), page_url = html_escape::encode_quoted_attribute(&page_url),
title = html_escape::encode_text(title), title = html_escape::encode_safe(title),
extract = html_escape::encode_text(&extract), extract = html_escape::encode_safe(&extract),
))) )))
} }

View File

@ -34,6 +34,7 @@ engines! {
Calc = "calc", Calc = "calc",
Wikipedia = "wikipedia", Wikipedia = "wikipedia",
Dictionary = "dictionary", Dictionary = "dictionary",
Thesaurus = "thesaurus",
Timezone = "timezone", Timezone = "timezone",
// post-search // post-search
StackExchange = "stackexchange", StackExchange = "stackexchange",
@ -61,6 +62,7 @@ engine_requests! {
Calc => answer::calc::request, None, Calc => answer::calc::request, None,
Wikipedia => answer::wikipedia::request, parse_response, Wikipedia => answer::wikipedia::request, parse_response,
Dictionary => answer::dictionary::request, parse_response, Dictionary => answer::dictionary::request, parse_response,
Thesaurus => answer::thesaurus::request, parse_response,
Timezone => answer::timezone::request, None, Timezone => answer::timezone::request, None,
} }

View File

@ -57,14 +57,14 @@ pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> Option<Strin
format!( format!(
r#"<h2>{category} <a href="{url}">{title}</a> <span class="infobox-docs_rs-version">{version}</span></h2>"#, r#"<h2>{category} <a href="{url}">{title}</a> <span class="infobox-docs_rs-version">{version}</span></h2>"#,
url = html_escape::encode_quoted_attribute(&url.to_string()), url = html_escape::encode_quoted_attribute(&url.to_string()),
title = html_escape::encode_text(&title), title = html_escape::encode_safe(&title),
version = html_escape::encode_text(&version), version = html_escape::encode_safe(&version),
) )
} else { } else {
format!( format!(
r#"<h2>{category} <a href="{url}">{title}</a></h2>"#, r#"<h2>{category} <a href="{url}">{title}</a></h2>"#,
url = html_escape::encode_quoted_attribute(&url.to_string()), url = html_escape::encode_quoted_attribute(&url.to_string()),
title = html_escape::encode_text(&title), title = html_escape::encode_safe(&title),
) )
}; };

View File

@ -72,6 +72,6 @@ pub fn parse_response(body: &str) -> Option<String> {
r#"<a href="{url}"><h1>{title}</h1></a> r#"<a href="{url}"><h1>{title}</h1></a>
<div class="infobox-github-readme">{readme_html}</div>"#, <div class="infobox-github-readme">{readme_html}</div>"#,
url = html_escape::encode_quoted_attribute(&url), url = html_escape::encode_quoted_attribute(&url),
title = html_escape::encode_text(&title), title = html_escape::encode_safe(&title),
)) ))
} }

View File

@ -59,6 +59,6 @@ pub fn parse_response(body: &str) -> Option<String> {
r#"<a href="{url}"><h2>{title}</h2></a> r#"<a href="{url}"><h2>{title}</h2></a>
<div class="infobox-stackexchange-answer">{answer_html}</div>"#, <div class="infobox-stackexchange-answer">{answer_html}</div>"#,
url = html_escape::encode_quoted_attribute(&url.to_string()), url = html_escape::encode_quoted_attribute(&url.to_string()),
title = html_escape::encode_text(&title), title = html_escape::encode_safe(&title),
)) ))
} }

View File

@ -238,7 +238,8 @@ h1 {
color: var(--syntax-special); color: var(--syntax-special);
} }
.answer-dictionary-word { .answer-dictionary-word,
.answer-thesaurus-word {
margin-top: 0; margin-top: 0;
} }
.answer-dictionary-part-of-speech { .answer-dictionary-part-of-speech {
@ -249,6 +250,45 @@ h1 {
margin-bottom: 0.5em; margin-bottom: 0.5em;
} }
.answer-thesaurus-item:not(:last-child) {
border-bottom: 1px solid var(--bg-4);
margin-bottom: 1rem;
padding-bottom: 1rem;
}
.answer-thesaurus-word-description {
font-style: italic;
opacity: 0.8;
}
.answer-thesaurus-part-of-speech {
font-weight: bold;
}
.answer-thesaurus-as-in {
font-style: italic;
}
h3.answer-thesaurus-category-title {
margin-top: 0.5rem;
}
.answer-thesaurus-strongest {
opacity: 1;
}
.answer-thesaurus-strong {
opacity: 0.8;
}
.answer-thesaurus-weak {
opacity: 0.6;
}
.answer-thesaurus-list {
margin: 0;
padding: 0;
display: flex;
flex-wrap: wrap;
gap: 0.8em;
list-style-type: none;
}
.answer-thesaurus-list a {
text-decoration: underline;
}
/* infobox */ /* infobox */
.infobox { .infobox {
margin-bottom: 1rem; margin-bottom: 1rem;