metasearch/src/engines/answer/thesaurus.rs
2024-04-15 19:19:09 -05:00

229 lines
6.7 KiB
Rust

use eyre::eyre;
use scraper::{Html, Selector};
use serde::Deserialize;
use tracing::error;
use url::Url;
use crate::engines::{EngineResponse, RequestResponse, CLIENT};
use super::regex;
pub fn request(query: &str) -> RequestResponse {
let re = regex!(r"^synonyms for\s+(\w+)$");
let query = match re.captures(query) {
Some(caps) => caps.get(1).unwrap().as_str(),
None => return RequestResponse::None,
}
.to_lowercase();
CLIENT
.get(
Url::parse(
format!(
"https://www.thesaurus.com/browse/{}",
urlencoding::encode(&query.to_lowercase())
)
.as_str(),
)
.unwrap(),
)
.into()
}
#[derive(Debug, Deserialize)]
pub struct ThesaurusResponse {
/// Example: `silly`
pub word: String,
pub items: Vec<ThesaurusItem>,
}
#[derive(Debug, Deserialize)]
pub struct ThesaurusItem {
/// Example `adjective`
pub part_of_speech: String,
/// Example: `absurd, giddy, foolish`
pub as_in: String,
pub strongest_matches: Vec<String>,
pub strong_matches: Vec<String>,
pub weak_matches: Vec<String>,
}
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
let response = parse_thesaurus_com_response(body)?;
if response.items.is_empty() {
return Ok(EngineResponse::new());
}
let rendered_html = render_thesaurus_html(response);
Ok(EngineResponse::answer_html(rendered_html))
}
fn parse_thesaurus_com_response(body: &str) -> eyre::Result<ThesaurusResponse> {
let dom = Html::parse_document(body);
let word = dom
.select(&Selector::parse("h1").unwrap())
.next()
.ok_or_else(|| eyre!("No title found"))?
.text()
.collect::<String>();
let card_sel = Selector::parse("[data-type='synonym-and-antonym-card']").unwrap();
let card_els = dom.select(&card_sel);
let mut items = Vec::<ThesaurusItem>::new();
for synonym_and_antonym_card_el in card_els {
items.push(parse_thesaurus_com_item(synonym_and_antonym_card_el)?);
}
Ok(ThesaurusResponse { word, items })
}
fn parse_thesaurus_com_item(
synonym_and_antonym_card_el: scraper::ElementRef,
) -> eyre::Result<ThesaurusItem> {
let adjective_as_in_words = synonym_and_antonym_card_el
.select(&Selector::parse("div:first-child > p").unwrap())
.next()
.ok_or_else(|| eyre!("No adjective as in words found"))?
.text()
.collect::<String>();
let (part_of_speech, as_in) = adjective_as_in_words
.split_once(" as in ")
.ok_or_else(|| eyre!("No 'as in' found"))?;
let part_of_speech = part_of_speech.trim().to_owned();
let as_in = as_in.trim().to_owned();
let matches_container_el = synonym_and_antonym_card_el
.select(&Selector::parse("div:nth-child(2) > div:nth-child(2)").unwrap())
.next()
.ok_or_else(|| eyre!("No matches container found"))?;
let mut strongest_matches = Vec::<String>::new();
let mut strong_matches = Vec::<String>::new();
let mut weak_matches = Vec::<String>::new();
for match_el in matches_container_el.select(&Selector::parse("div").unwrap()) {
let match_type = match_el
.select(&Selector::parse("p").unwrap())
.next()
.ok_or_else(|| eyre!("No match type found"))?
.text()
.collect::<String>();
let match_type = match_type
.split(' ')
.next()
.ok_or_else(|| eyre!("No match type found"))?;
let matches = match_el
.select(&Selector::parse("a").unwrap())
.map(|el| el.text().collect::<String>())
.collect::<Vec<String>>();
match match_type {
"Strongest" => {
strongest_matches = matches;
}
"Strong" => {
strong_matches = matches;
}
"Weak" => {
weak_matches = matches;
}
_ => {
error!("Unknown thesaurus match type: {match_type}");
}
}
}
Ok(ThesaurusItem {
part_of_speech,
as_in,
strongest_matches,
strong_matches,
weak_matches,
})
}
fn render_thesaurus_html(ThesaurusResponse { word, items }: ThesaurusResponse) -> String {
let mut html = String::new();
html.push_str(&format!(
"<h2 class=\"answer-thesaurus-word\"><a href=\"https://www.thesaurus.com/browse/{word}\">{word}</a></h2>",
word = html_escape::encode_safe(&word)
));
html.push_str("<div class=\"answer-thesaurus-items\">");
for item in items {
html.push_str("<div class=\"answer-thesaurus-item\">");
html.push_str(&render_thesaurus_item_html(item));
html.push_str("</div>");
}
html.push_str("</div>");
html
}
fn render_thesaurus_item_html(
ThesaurusItem {
part_of_speech,
as_in,
strongest_matches,
strong_matches,
weak_matches,
}: ThesaurusItem,
) -> String {
let mut html = String::new();
html.push_str(&format!(
"<span class=\"answer-thesaurus-word-description\"><span class=\"answer-thesaurus-part-of-speech\">{part_of_speech}</span>, as in <span class=\"answer-thesaurus-as-in\">{as_in}</span></span>",
part_of_speech = html_escape::encode_safe(&part_of_speech.to_lowercase()),
as_in = html_escape::encode_safe(&as_in)
));
let render_matches = |matches: Vec<String>, strength: &str| {
if matches.is_empty() {
return String::new();
}
let mut html = String::new();
html.push_str(&format!(
"<div class=\"answer-thesaurus-{strength_id}\">",
strength_id = html_escape::encode_safe(&strength.to_lowercase().replace(' ', "-"))
));
html.push_str(&format!(
"<h3 class=\"answer-thesaurus-category-title\">{strength} {match_or_matches}</h3>",
strength = html_escape::encode_safe(&strength),
match_or_matches = if matches.len() == 1 {
"match"
} else {
"matches"
}
));
html.push_str("<ul class=\"answer-thesaurus-list\">");
for synonym in matches {
html.push_str(&format!(
"<li><a href=\"https://www.thesaurus.com/browse/{synonym}\">{synonym}</a></li>",
synonym = html_escape::encode_safe(&synonym)
));
}
html.push_str("</ul>");
html.push_str("</div>");
html
};
html.push_str(&render_matches(strongest_matches, "Strongest"));
html.push_str(&render_matches(strong_matches, "Strong"));
html.push_str(&render_matches(weak_matches, "Weak"));
html
}