use std::collections::HashMap; use eyre::eyre; use serde::Deserialize; use url::Url; use crate::engines::{EngineResponse, HttpResponse, RequestResponse, CLIENT}; use super::regex; pub fn request(query: &str) -> RequestResponse { // if the query starts with "define " then use that, otherwise abort let re = regex!(r"^define\s+(\w+)$"); let query = match re.captures(query) { Some(caps) => caps.get(1).unwrap().as_str(), None => return RequestResponse::None, } .to_lowercase(); CLIENT .get( Url::parse( format!( "https://en.wiktionary.org/api/rest_v1/page/definition/{}", urlencoding::encode(&query) ) .as_str(), ) .unwrap(), ) .into() } #[derive(Debug, Deserialize)] pub struct WiktionaryResponse(pub HashMap>); #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct WiktionaryEntry { pub part_of_speech: String, pub language: String, pub definitions: Vec, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct WiktionaryDefinition { pub definition: String, #[serde(default)] pub examples: Vec, } pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> eyre::Result { let url = res.url(); let Ok(res) = serde_json::from_str::(body) else { return Ok(EngineResponse::new()); }; let mediawiki_key = url .path_segments() .ok_or_else(|| eyre!("url has no path segments"))? .last() .ok_or_else(|| eyre!("url has no last path segment"))?; let word = key_to_title(mediawiki_key); let mut html = String::new(); let Some(entries) = res.0.get("en") else { return Ok(EngineResponse::new()); }; html.push_str(&format!( "

{word}

", word = html_escape::encode_text(&word), )); let mut cleaner = ammonia::Builder::default(); cleaner .link_rel(None) .url_relative(ammonia::UrlRelative::RewriteWithBase( Url::parse("https://en.wiktionary.org").unwrap(), )); for entry in entries { html.push_str(&format!( "{part_of_speech}", part_of_speech = html_escape::encode_text(&entry.part_of_speech.to_lowercase()) )); html.push_str("
    "); let mut previous_definitions = Vec::::new(); for definition in &entry.definitions { if definition.definition.is_empty() { // wiktionary does this sometimes, for example https://en.wiktionary.org/api/rest_v1/page/definition/variance continue; } if previous_definitions .iter() .any(|d| d.contains(&definition.definition)) { // wiktionary will sometimes duplicate definitions, for example https://en.wiktionary.org/api/rest_v1/page/definition/google continue; } previous_definitions.push(definition.definition.clone()); html.push_str("
  1. "); let definition_html = cleaner .clean(&definition.definition.replace('“', "\"")) .to_string(); html.push_str(&format!("

    {definition_html}

    ")); if !definition.examples.is_empty() { for example in &definition.examples { let example_html = cleaner.clean(example).to_string(); html.push_str(&format!("
    {example_html}
    ")); } } html.push_str("
  2. "); } html.push_str("
"); } Ok(EngineResponse::answer_html(html)) } fn key_to_title(key: &str) -> String { // https://github.com/wikimedia/mediawiki-title // In general, the page title is converted to the mediawiki DB key format by // trimming spaces, replacing whitespace symbols to underscores and applying // wiki-specific capitalization rules. let title = key.trim().replace('_', " "); let mut c = title.chars(); match c.next() { None => String::new(), Some(f) => f.to_uppercase().chain(c).collect(), } }