From ef65e60f9f7af1c3a364943e60d0780a4b294d60 Mon Sep 17 00:00:00 2001 From: mat <27899617+mat-1@users.noreply.github.com> Date: Mon, 20 May 2024 04:08:49 -0500 Subject: [PATCH] Image search (#10) * add tab for image search * add basic implementation of image search * add proxy * fix google images regex breaking when the query has spaces * fix sizing of image elements while they're loading * add optional engines indicator to image search * add bing images * fix some parsing issues with bing images * fix bing titles --- Cargo.lock | 1 + Cargo.toml | 10 +- config-base.toml | 5 + src/config.rs | 118 +++++-- src/engines/macros.rs | 28 ++ src/engines/mod.rs | 423 +++++++++++------------ src/engines/postsearch/docs_rs.rs | 4 +- src/engines/postsearch/github.rs | 4 +- src/engines/postsearch/mdn.rs | 3 +- src/engines/postsearch/minecraft_wiki.rs | 8 +- src/engines/postsearch/stackexchange.rs | 4 +- src/engines/ranking.rs | 204 +++++++++++ src/engines/search/bing.rs | 89 ++++- src/engines/search/google.rs | 97 +++++- src/web/assets/style.css | 75 +++- src/web/image_proxy.rs | 73 ++++ src/web/mod.rs | 2 + src/web/search.rs | 183 ++++------ src/web/search/all.rs | 93 +++++ src/web/search/images.rs | 48 +++ 20 files changed, 1084 insertions(+), 388 deletions(-) create mode 100644 src/engines/ranking.rs create mode 100644 src/web/image_proxy.rs create mode 100644 src/web/search/all.rs create mode 100644 src/web/search/images.rs diff --git a/Cargo.lock b/Cargo.lock index d4702fa..a5ddd58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1841,6 +1841,7 @@ version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ + "indexmap", "itoa", "ryu", "serde", diff --git a/Cargo.toml b/Cargo.toml index 7008f32..1189bf4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,15 +28,11 @@ numbat = "1.11.0" once_cell = "1.19.0" rand = "0.8.5" regex = "1.10.3" -reqwest = { version = "0.11.26", default-features = false, features = [ - "rustls-tls", - "gzip", - "deflate", - "brotli", -] } +reqwest = { version = "0.11.26", default-features = false, features = ["rustls-tls", "gzip", "deflate", "brotli"] } scraper = "0.19.0" serde = { version = "1.0.197", features = ["derive"] } -serde_json = "1.0.114" +# preserve_order is needed for google images. yippee! +serde_json = { version = "1.0.114", features = ["preserve_order"] } tokio = { version = "1.36.0", features = ["rt", "macros"] } tokio-stream = "0.1.15" toml = { version = "0.8.12", default-features = false, features = ["parse"] } diff --git a/config-base.toml b/config-base.toml index e73c5ad..605922a 100644 --- a/config-base.toml +++ b/config-base.toml @@ -6,6 +6,11 @@ bind = "0.0.0.0:28019" show_engine_list_separator = false show_version_info = false +[image_search] +enabled = false +show_engines = true +proxy = { enabled = true, max_download_size = 10_000_000 } + [engines] google = { weight = 1.05 } bing = { weight = 1.0 } diff --git a/src/config.rs b/src/config.rs index 287f03d..ce8f5d8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -12,6 +12,8 @@ pub struct Config { #[serde(default)] pub ui: UiConfig, #[serde(default)] + pub image_search: ImageSearchConfig, + #[serde(default)] pub engines: EnginesConfig, } @@ -23,12 +25,51 @@ pub struct UiConfig { pub show_version_info: Option, } +#[derive(Deserialize, Debug, Default)] +pub struct ImageSearchConfig { + pub enabled: Option, + pub show_engines: Option, + #[serde(default)] + pub proxy: ImageProxyConfig, +} + +#[derive(Deserialize, Debug, Default)] +pub struct ImageProxyConfig { + /// Whether we should proxy remote images through our server. This is mostly + /// a privacy feature. + pub enabled: Option, + /// The maximum size of an image that can be proxied. This is in bytes. + pub max_download_size: Option, +} + #[derive(Deserialize, Debug, Default)] pub struct EnginesConfig { #[serde(flatten)] pub map: HashMap, } +#[derive(Deserialize, Clone, Debug)] +#[serde(untagged)] +pub enum DefaultableEngineConfig { + Boolean(bool), + Full(FullEngineConfig), +} + +#[derive(Deserialize, Clone, Debug)] +pub struct FullEngineConfig { + #[serde(default = "fn_true")] + pub enabled: bool, + + /// The priority of this engine relative to the other engines. The default + /// is 1, and a value of 0 is treated as the default. + #[serde(default)] + pub weight: f64, + /// Per-engine configs. These are parsed at request time. + #[serde(flatten)] + #[serde(default)] + pub extra: toml::Table, +} + impl Config { pub fn read_or_create(config_path: &Path) -> eyre::Result { let base_config_str = include_str!("../config-base.toml"); @@ -50,20 +91,39 @@ impl Config { // use the default for something. pub fn update(&mut self, new: Config) { self.bind = new.bind; - self.ui.show_engine_list_separator = new - .ui + self.ui.update(new.ui); + self.image_search.update(new.image_search); + self.engines.update(new.engines); + } +} + +impl UiConfig { + pub fn update(&mut self, new: UiConfig) { + self.show_engine_list_separator = new .show_engine_list_separator - .or(self.ui.show_engine_list_separator); - assert_ne!(self.ui.show_engine_list_separator, None); - self.ui.show_version_info = new.ui.show_version_info.or(self.ui.show_version_info); - assert_ne!(self.ui.show_version_info, None); - for (key, new) in new.engines.map { - if let Some(existing) = self.engines.map.get_mut(&key) { - existing.update(new); - } else { - self.engines.map.insert(key, new); - } - } + .or(self.show_engine_list_separator); + assert_ne!(self.show_engine_list_separator, None); + self.show_version_info = new.show_version_info.or(self.show_version_info); + assert_ne!(self.show_version_info, None); + } +} + +impl ImageSearchConfig { + pub fn update(&mut self, new: ImageSearchConfig) { + self.enabled = new.enabled.or(self.enabled); + assert_ne!(self.enabled, None); + self.show_engines = new.show_engines.or(self.show_engines); + assert_ne!(self.show_engines, None); + self.proxy.update(new.proxy); + } +} + +impl ImageProxyConfig { + pub fn update(&mut self, new: ImageProxyConfig) { + self.enabled = new.enabled.or(self.enabled); + assert_ne!(self.enabled, None); + self.max_download_size = new.max_download_size.or(self.max_download_size); + assert_ne!(self.max_download_size, None); } } @@ -91,13 +151,16 @@ impl EnginesConfig { None => &DEFAULT_ENABLED_FULL_ENGINE_CONFIG, } } -} -#[derive(Deserialize, Clone, Debug)] -#[serde(untagged)] -pub enum DefaultableEngineConfig { - Boolean(bool), - Full(FullEngineConfig), + pub fn update(&mut self, new: Self) { + for (key, new) in new.map { + if let Some(existing) = self.map.get_mut(&key) { + existing.update(new); + } else { + self.map.insert(key, new); + } + } + } } impl DefaultableEngineConfig { @@ -115,24 +178,9 @@ impl Default for DefaultableEngineConfig { } } -#[derive(Deserialize, Clone, Debug)] -pub struct FullEngineConfig { - #[serde(default = "default_true")] - pub enabled: bool, - - /// The priority of this engine relative to the other engines. The default - /// is 1, and a value of 0 is treated as the default. - #[serde(default)] - pub weight: f64, - /// Per-engine configs. These are parsed at request time. - #[serde(flatten)] - #[serde(default)] - pub extra: toml::Table, -} - // serde expects a function as the default, this just exists so "enabled" is // always true by default -fn default_true() -> bool { +fn fn_true() -> bool { true } diff --git a/src/engines/macros.rs b/src/engines/macros.rs index 2f7184d..6fbad1c 100644 --- a/src/engines/macros.rs +++ b/src/engines/macros.rs @@ -126,3 +126,31 @@ macro_rules! engine_postsearch_requests { } }; } + +#[macro_export] +macro_rules! engine_image_requests { + ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => { + impl Engine { + #[must_use] + pub fn request_images(&self, query: &SearchQuery) -> RequestResponse { + match self { + $( + Engine::$engine => $module::$engine_id::$request(query).into(), + )* + _ => RequestResponse::None, + } + } + + pub fn parse_images_response(&self, res: &HttpResponse) -> eyre::Result { + #[allow(clippy::useless_conversion)] + match self { + $( + Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response } + .ok_or_else(|| eyre::eyre!("engine {self:?} can't parse images response"))?, + )* + _ => eyre::bail!("engine {self:?} can't parse response"), + } + } + } + }; +} diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 7a7e123..83814f1 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -1,25 +1,26 @@ use std::{ collections::{BTreeSet, HashMap}, - fmt, + fmt::{self, Display}, net::IpAddr, ops::Deref, str::FromStr, sync::Arc, - time::Instant, + time::{Duration, Instant}, }; use futures::future::join_all; use maud::PreEscaped; use once_cell::sync::Lazy; -use reqwest::header::HeaderMap; +use reqwest::{header::HeaderMap, RequestBuilder}; use serde::{Deserialize, Deserializer}; use tokio::sync::mpsc; use tracing::{error, info}; mod macros; +mod ranking; use crate::{ - config::Config, engine_autocomplete_requests, engine_postsearch_requests, engine_requests, - engines, + config::Config, engine_autocomplete_requests, engine_image_requests, + engine_postsearch_requests, engine_requests, engines, }; pub mod answer; @@ -90,6 +91,11 @@ engine_postsearch_requests! { StackExchange => postsearch::stackexchange::request, parse_response, } +engine_image_requests! { + Google => search::google::request_images, parse_images_response, + Bing => search::bing::request_images, parse_images_response, +} + impl fmt::Display for Engine { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.id()) @@ -108,6 +114,7 @@ impl<'de> Deserialize<'de> for Engine { pub struct SearchQuery { pub query: String, + pub tab: SearchTab, pub request_headers: HashMap, pub ip: String, /// The config is part of the query so it's possible to make a query with a @@ -123,6 +130,31 @@ impl Deref for SearchQuery { } } +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +pub enum SearchTab { + #[default] + All, + Images, +} +impl FromStr for SearchTab { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "all" => Ok(Self::All), + "images" => Ok(Self::Images), + _ => Err(()), + } + } +} +impl Display for SearchTab { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::All => write!(f, "all"), + Self::Images => write!(f, "images"), + } + } +} + pub enum RequestResponse { None, Http(reqwest::RequestBuilder), @@ -172,7 +204,7 @@ impl From for reqwest::Response { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct EngineSearchResult { pub url: String, pub title: String, @@ -194,6 +226,11 @@ pub struct EngineResponse { pub infobox_html: Option>, } +#[derive(Default)] +pub struct EngineImagesResponse { + pub image_results: Vec, +} + impl EngineResponse { #[must_use] pub fn new() -> Self { @@ -217,6 +254,22 @@ impl EngineResponse { } } +impl EngineImagesResponse { + #[must_use] + pub fn new() -> Self { + Self::default() + } +} + +#[derive(Debug, Clone)] +pub struct EngineImageResult { + pub image_url: String, + pub page_url: String, + pub title: String, + pub width: u64, + pub height: u64, +} + #[derive(Debug)] pub enum EngineProgressUpdate { Requesting, @@ -231,7 +284,7 @@ pub enum ProgressUpdateData { engine: Engine, update: EngineProgressUpdate, }, - Response(Response), + Response(ResponseForTab), PostSearchInfobox(Infobox), } @@ -251,17 +304,40 @@ impl ProgressUpdate { } } -#[tracing::instrument(fields(query = %query.query), skip(progress_tx))] -pub async fn search( +async fn make_request( + request: RequestBuilder, + engine: Engine, query: &SearchQuery, - progress_tx: mpsc::UnboundedSender, + send_engine_progress_update: impl Fn(Engine, EngineProgressUpdate), +) -> eyre::Result { + send_engine_progress_update(engine, EngineProgressUpdate::Requesting); + + let mut res = request.send().await?; + + send_engine_progress_update(engine, EngineProgressUpdate::Downloading); + + let mut body_bytes = Vec::new(); + while let Some(chunk) = res.chunk().await? { + body_bytes.extend_from_slice(&chunk); + } + let body = String::from_utf8_lossy(&body_bytes).to_string(); + + send_engine_progress_update(engine, EngineProgressUpdate::Parsing); + + let http_response = HttpResponse { + res, + body, + config: query.config.clone(), + }; + Ok(http_response) +} + +async fn make_requests( + query: &SearchQuery, + progress_tx: &mpsc::UnboundedSender, + start_time: Instant, + send_engine_progress_update: &impl Fn(Engine, EngineProgressUpdate), ) -> eyre::Result<()> { - let start_time = Instant::now(); - - info!("Doing search"); - - let progress_tx = &progress_tx; - let mut requests = Vec::new(); for &engine in Engine::all() { let engine_config = query.config.engines.get(engine); @@ -274,59 +350,18 @@ pub async fn search( let response = match request_response { RequestResponse::Http(request) => { - progress_tx.send(ProgressUpdate::new( - ProgressUpdateData::Engine { - engine, - update: EngineProgressUpdate::Requesting, - }, - start_time, - ))?; - - let mut res = request.send().await?; - - progress_tx.send(ProgressUpdate::new( - ProgressUpdateData::Engine { - engine, - update: EngineProgressUpdate::Downloading, - }, - start_time, - ))?; - - let mut body_bytes = Vec::new(); - while let Some(chunk) = res.chunk().await? { - body_bytes.extend_from_slice(&chunk); - } - let body = String::from_utf8_lossy(&body_bytes).to_string(); - - progress_tx.send(ProgressUpdate::new( - ProgressUpdateData::Engine { - engine, - update: EngineProgressUpdate::Parsing, - }, - start_time, - ))?; - - let http_response = HttpResponse { - res, - body, - config: query.config.clone(), - }; + let http_response = + make_request(request, engine, query, send_engine_progress_update).await?; let response = match engine.parse_response(&http_response) { Ok(response) => response, Err(e) => { - error!("parse error: {e}"); + error!("parse error for {engine}: {e}"); EngineResponse::new() } }; - progress_tx.send(ProgressUpdate::new( - ProgressUpdateData::Engine { - engine, - update: EngineProgressUpdate::Done, - }, - start_time, - ))?; + send_engine_progress_update(engine, EngineProgressUpdate::Done); response } @@ -347,12 +382,10 @@ pub async fn search( join_all(response_futures).await.into_iter().collect(); let responses = responses_result?; - let response = merge_engine_responses(query.config.clone(), responses); - + let response = ranking::merge_engine_responses(query.config.clone(), responses); let has_infobox = response.infobox.is_some(); - progress_tx.send(ProgressUpdate::new( - ProgressUpdateData::Response(response.clone()), + ProgressUpdateData::Response(ResponseForTab::All(response.clone())), start_time, ))?; @@ -420,6 +453,98 @@ pub async fn search( Ok(()) } +async fn make_image_requests( + query: &SearchQuery, + progress_tx: &mpsc::UnboundedSender, + start_time: Instant, + send_engine_progress_update: &impl Fn(Engine, EngineProgressUpdate), +) -> eyre::Result<()> { + let mut requests = Vec::new(); + for &engine in Engine::all() { + let engine_config = query.config.engines.get(engine); + if !engine_config.enabled { + continue; + } + + requests.push(async move { + let request_response = engine.request_images(query); + + let response = match request_response { + RequestResponse::Http(request) => { + let http_response = + make_request(request, engine, query, send_engine_progress_update).await?; + + let response = match engine.parse_images_response(&http_response) { + Ok(response) => response, + Err(e) => { + error!("parse error for {engine} (images): {e}"); + EngineImagesResponse::new() + } + }; + + send_engine_progress_update(engine, EngineProgressUpdate::Done); + + response + } + RequestResponse::Instant(_) => { + error!("unexpected instant response for image request"); + EngineImagesResponse::new() + } + RequestResponse::None => EngineImagesResponse::new(), + }; + + Ok((engine, response)) + }); + } + + let mut response_futures = Vec::new(); + for request in requests { + response_futures.push(request); + } + + let responses_result: eyre::Result> = + join_all(response_futures).await.into_iter().collect(); + let responses = responses_result?; + + let response = ranking::merge_images_responses(query.config.clone(), responses); + progress_tx.send(ProgressUpdate::new( + ProgressUpdateData::Response(ResponseForTab::Images(response.clone())), + start_time, + ))?; + + Ok(()) +} + +#[tracing::instrument(fields(query = %query.query), skip(progress_tx))] +pub async fn search( + query: &SearchQuery, + progress_tx: mpsc::UnboundedSender, +) -> eyre::Result<()> { + let start_time = Instant::now(); + + info!("Doing search"); + + let progress_tx = &progress_tx; + let send_engine_progress_update = |engine: Engine, update: EngineProgressUpdate| { + let _ = progress_tx.send(ProgressUpdate::new( + ProgressUpdateData::Engine { engine, update }, + start_time, + )); + }; + + match query.tab { + SearchTab::All => { + make_requests(query, progress_tx, start_time, &send_engine_progress_update).await? + } + SearchTab::Images => { + make_image_requests(query, progress_tx, start_time, &send_engine_progress_update) + .await? + } + } + + Ok(()) +} + pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result> { let mut requests = Vec::new(); for &engine in Engine::all() { @@ -452,7 +577,10 @@ pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result = Lazy::new(|| { @@ -466,13 +594,14 @@ pub static CLIENT: Lazy = Lazy::new(|| { headers.insert("Accept-Language", "en-US,en;q=0.5".parse().unwrap()); headers }) + .timeout(Duration::from_secs(10)) .build() .unwrap() }); #[derive(Debug, Clone)] pub struct Response { - pub search_results: Vec, + pub search_results: Vec>, pub featured_snippet: Option, pub answer: Option, pub infobox: Option, @@ -480,10 +609,20 @@ pub struct Response { } #[derive(Debug, Clone)] -pub struct SearchResult { - pub url: String, - pub title: String, - pub description: String, +pub struct ImagesResponse { + pub image_results: Vec>, + pub config: Arc, +} + +#[derive(Debug, Clone)] +pub enum ResponseForTab { + All(Response), + Images(ImagesResponse), +} + +#[derive(Debug, Clone)] +pub struct SearchResult { + pub result: R, pub engines: BTreeSet, pub score: f64, } @@ -508,149 +647,7 @@ pub struct Infobox { pub engine: Engine, } -fn merge_engine_responses( - config: Arc, - responses: HashMap, -) -> Response { - let mut search_results: Vec = Vec::new(); - let mut featured_snippet: Option = None; - let mut answer: Option = None; - let mut infobox: Option = None; - - for (engine, response) in responses { - let engine_config = config.engines.get(engine); - - for (result_index, search_result) in response.search_results.into_iter().enumerate() { - // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a - // score of 0.33, etc. - let base_result_score = 1. / (result_index + 1) as f64; - let result_score = base_result_score * engine_config.weight; - - if let Some(existing_result) = search_results - .iter_mut() - .find(|r| r.url == search_result.url) - { - // if the weight of this engine is higher than every other one then replace the - // title and description - if engine_config.weight - > existing_result - .engines - .iter() - .map(|&other_engine| { - let other_engine_config = config.engines.get(other_engine); - other_engine_config.weight - }) - .max_by(|a, b| a.partial_cmp(b).unwrap()) - .unwrap_or(0.) - { - existing_result.title = search_result.title; - existing_result.description = search_result.description; - } - - existing_result.engines.insert(engine); - existing_result.score += result_score; - } else { - search_results.push(SearchResult { - url: search_result.url, - title: search_result.title, - description: search_result.description, - engines: [engine].iter().copied().collect(), - score: result_score, - }); - } - } - - if let Some(engine_featured_snippet) = response.featured_snippet { - // if it has a higher weight than the current featured snippet - let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| { - let other_engine_config = config.engines.get(s.engine); - other_engine_config.weight - }); - if engine_config.weight > featured_snippet_weight { - featured_snippet = Some(FeaturedSnippet { - url: engine_featured_snippet.url, - title: engine_featured_snippet.title, - description: engine_featured_snippet.description, - engine, - }); - } - } - - if let Some(engine_answer_html) = response.answer_html { - // if it has a higher weight than the current answer - let answer_weight = answer.as_ref().map_or(0., |s| { - let other_engine_config = config.engines.get(s.engine); - other_engine_config.weight - }); - if engine_config.weight > answer_weight { - answer = Some(Answer { - html: engine_answer_html, - engine, - }); - } - } - - if let Some(engine_infobox_html) = response.infobox_html { - // if it has a higher weight than the current infobox - let infobox_weight = infobox.as_ref().map_or(0., |s| { - let other_engine_config = config.engines.get(s.engine); - other_engine_config.weight - }); - if engine_config.weight > infobox_weight { - infobox = Some(Infobox { - html: engine_infobox_html, - engine, - }); - } - } - } - - search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); - - Response { - search_results, - featured_snippet, - answer, - infobox, - config, - } -} - pub struct AutocompleteResult { pub query: String, pub score: f64, } - -fn merge_autocomplete_responses( - config: &Config, - responses: HashMap>, -) -> Vec { - let mut autocomplete_results: Vec = Vec::new(); - - for (engine, response) in responses { - let engine_config = config.engines.get(engine); - - for (result_index, autocomplete_result) in response.into_iter().enumerate() { - // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a - // score of 0.33, etc. - let base_result_score = 1. / (result_index + 1) as f64; - let result_score = base_result_score * engine_config.weight; - - if let Some(existing_result) = autocomplete_results - .iter_mut() - .find(|r| r.query == autocomplete_result) - { - existing_result.score += result_score; - } else { - autocomplete_results.push(AutocompleteResult { - query: autocomplete_result, - score: result_score, - }); - } - } - } - - autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); - - autocomplete_results.into_iter().map(|r| r.query).collect() -} diff --git a/src/engines/postsearch/docs_rs.rs b/src/engines/postsearch/docs_rs.rs index 2c0090e..4c14c51 100644 --- a/src/engines/postsearch/docs_rs.rs +++ b/src/engines/postsearch/docs_rs.rs @@ -5,8 +5,8 @@ use crate::engines::{HttpResponse, Response, CLIENT}; pub fn request(response: &Response) -> Option { for search_result in response.search_results.iter().take(8) { - if search_result.url.starts_with("https://docs.rs/") { - return Some(CLIENT.get(search_result.url.as_str())); + if search_result.result.url.starts_with("https://docs.rs/") { + return Some(CLIENT.get(search_result.result.url.as_str())); } } diff --git a/src/engines/postsearch/github.rs b/src/engines/postsearch/github.rs index f2ef363..b5a2e86 100644 --- a/src/engines/postsearch/github.rs +++ b/src/engines/postsearch/github.rs @@ -6,8 +6,8 @@ use crate::engines::{answer::regex, Response, CLIENT}; pub fn request(response: &Response) -> Option { for search_result in response.search_results.iter().take(8) { - if regex!(r"^https:\/\/github\.com\/[\w-]+\/[\w.-]+$").is_match(&search_result.url) { - return Some(CLIENT.get(search_result.url.as_str())); + if regex!(r"^https:\/\/github\.com\/[\w-]+\/[\w.-]+$").is_match(&search_result.result.url) { + return Some(CLIENT.get(search_result.result.url.as_str())); } } diff --git a/src/engines/postsearch/mdn.rs b/src/engines/postsearch/mdn.rs index 1736b6c..d7d9acf 100644 --- a/src/engines/postsearch/mdn.rs +++ b/src/engines/postsearch/mdn.rs @@ -13,10 +13,11 @@ pub struct MdnConfig { pub fn request(response: &Response) -> Option { for search_result in response.search_results.iter().take(8) { if search_result + .result .url .starts_with("https://developer.mozilla.org/en-US/docs/Web") { - return Some(CLIENT.get(search_result.url.as_str())); + return Some(CLIENT.get(search_result.result.url.as_str())); } } diff --git a/src/engines/postsearch/minecraft_wiki.rs b/src/engines/postsearch/minecraft_wiki.rs index b29276c..f9ca074 100644 --- a/src/engines/postsearch/minecraft_wiki.rs +++ b/src/engines/postsearch/minecraft_wiki.rs @@ -5,8 +5,12 @@ use crate::engines::{HttpResponse, Response, CLIENT}; pub fn request(response: &Response) -> Option { for search_result in response.search_results.iter().take(8) { - if search_result.url.starts_with("https://minecraft.wiki/w/") { - return Some(CLIENT.get(search_result.url.as_str())); + if search_result + .result + .url + .starts_with("https://minecraft.wiki/w/") + { + return Some(CLIENT.get(search_result.result.url.as_str())); } } diff --git a/src/engines/postsearch/stackexchange.rs b/src/engines/postsearch/stackexchange.rs index f576b81..4349ded 100644 --- a/src/engines/postsearch/stackexchange.rs +++ b/src/engines/postsearch/stackexchange.rs @@ -7,9 +7,9 @@ use crate::engines::{answer::regex, Response, CLIENT}; pub fn request(response: &Response) -> Option { for search_result in response.search_results.iter().take(8) { if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+") - .is_match(&search_result.url) + .is_match(&search_result.result.url) { - return Some(CLIENT.get(search_result.url.as_str())); + return Some(CLIENT.get(search_result.result.url.as_str())); } } diff --git a/src/engines/ranking.rs b/src/engines/ranking.rs new file mode 100644 index 0000000..40d6dc5 --- /dev/null +++ b/src/engines/ranking.rs @@ -0,0 +1,204 @@ +use std::{collections::HashMap, sync::Arc}; + +use crate::config::Config; + +use super::{ + Answer, AutocompleteResult, Engine, EngineImageResult, EngineImagesResponse, EngineResponse, + EngineSearchResult, FeaturedSnippet, ImagesResponse, Infobox, Response, SearchResult, +}; + +pub fn merge_engine_responses( + config: Arc, + responses: HashMap, +) -> Response { + let mut search_results: Vec> = Vec::new(); + let mut featured_snippet: Option = None; + let mut answer: Option = None; + let mut infobox: Option = None; + + for (engine, response) in responses { + let engine_config = config.engines.get(engine); + + for (result_index, search_result) in response.search_results.into_iter().enumerate() { + // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a + // score of 0.33, etc. + let base_result_score = 1. / (result_index + 1) as f64; + let result_score = base_result_score * engine_config.weight; + + if let Some(existing_result) = search_results + .iter_mut() + .find(|r| r.result.url == search_result.url) + { + // if the weight of this engine is higher than every other one then replace the + // title and description + if engine_config.weight + > existing_result + .engines + .iter() + .map(|&other_engine| { + let other_engine_config = config.engines.get(other_engine); + other_engine_config.weight + }) + .max_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap_or(0.) + { + existing_result.result.title = search_result.title; + existing_result.result.description = search_result.description; + } + + existing_result.engines.insert(engine); + existing_result.score += result_score; + } else { + search_results.push(SearchResult { + result: search_result, + engines: [engine].iter().copied().collect(), + score: result_score, + }); + } + } + + if let Some(engine_featured_snippet) = response.featured_snippet { + // if it has a higher weight than the current featured snippet + let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| { + let other_engine_config = config.engines.get(s.engine); + other_engine_config.weight + }); + if engine_config.weight > featured_snippet_weight { + featured_snippet = Some(FeaturedSnippet { + url: engine_featured_snippet.url, + title: engine_featured_snippet.title, + description: engine_featured_snippet.description, + engine, + }); + } + } + + if let Some(engine_answer_html) = response.answer_html { + // if it has a higher weight than the current answer + let answer_weight = answer.as_ref().map_or(0., |s| { + let other_engine_config = config.engines.get(s.engine); + other_engine_config.weight + }); + if engine_config.weight > answer_weight { + answer = Some(Answer { + html: engine_answer_html, + engine, + }); + } + } + + if let Some(engine_infobox_html) = response.infobox_html { + // if it has a higher weight than the current infobox + let infobox_weight = infobox.as_ref().map_or(0., |s| { + let other_engine_config = config.engines.get(s.engine); + other_engine_config.weight + }); + if engine_config.weight > infobox_weight { + infobox = Some(Infobox { + html: engine_infobox_html, + engine, + }); + } + } + } + + search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); + + Response { + search_results, + featured_snippet, + answer, + infobox, + config, + } +} + +pub fn merge_autocomplete_responses( + config: &Config, + responses: HashMap>, +) -> Vec { + let mut autocomplete_results: Vec = Vec::new(); + + for (engine, response) in responses { + let engine_config = config.engines.get(engine); + + for (result_index, autocomplete_result) in response.into_iter().enumerate() { + // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a + // score of 0.33, etc. + let base_result_score = 1. / (result_index + 1) as f64; + let result_score = base_result_score * engine_config.weight; + + if let Some(existing_result) = autocomplete_results + .iter_mut() + .find(|r| r.query == autocomplete_result) + { + existing_result.score += result_score; + } else { + autocomplete_results.push(AutocompleteResult { + query: autocomplete_result, + score: result_score, + }); + } + } + } + + autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); + + autocomplete_results.into_iter().map(|r| r.query).collect() +} + +pub fn merge_images_responses( + config: Arc, + responses: HashMap, +) -> ImagesResponse { + let mut image_results: Vec> = Vec::new(); + + for (engine, response) in responses { + let engine_config = config.engines.get(engine); + + for (result_index, image_result) in response.image_results.into_iter().enumerate() { + // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a + // score of 0.33, etc. + let base_result_score = 1. / (result_index + 1) as f64; + let result_score = base_result_score * engine_config.weight; + + if let Some(existing_result) = image_results + .iter_mut() + .find(|r| r.result.image_url == image_result.image_url) + { + // if the weight of this engine is higher than every other one then replace the + // title and page url + if engine_config.weight + > existing_result + .engines + .iter() + .map(|&other_engine| { + let other_engine_config = config.engines.get(other_engine); + other_engine_config.weight + }) + .max_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap_or(0.) + { + existing_result.result.title = image_result.title; + existing_result.result.page_url = image_result.page_url; + } + + existing_result.engines.insert(engine); + existing_result.score += result_score; + } else { + image_results.push(SearchResult { + result: image_result, + engines: [engine].iter().copied().collect(), + score: result_score, + }); + } + } + } + + image_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); + + ImagesResponse { + image_results, + config, + } +} diff --git a/src/engines/search/bing.rs b/src/engines/search/bing.rs index af00046..c0a9535 100644 --- a/src/engines/search/bing.rs +++ b/src/engines/search/bing.rs @@ -1,9 +1,11 @@ use base64::Engine; -use scraper::{ElementRef, Selector}; +use eyre::eyre; +use scraper::{ElementRef, Html, Selector}; +use tracing::warn; use url::Url; use crate::{ - engines::{EngineResponse, CLIENT}, + engines::{EngineImageResult, EngineImagesResponse, EngineResponse, CLIENT}, parse::{parse_html_response_with_opts, ParseOpts, QueryMethod}, }; @@ -64,6 +66,89 @@ pub fn parse_response(body: &str) -> eyre::Result { ) } +pub fn request_images(query: &str) -> reqwest::RequestBuilder { + CLIENT.get( + Url::parse_with_params( + "https://www.bing.com/images/async", + &[ + ("q", query), + ("async", "content"), + ("first", "1"), + ("count", "35"), + ], + ) + .unwrap(), + ) +} + +#[tracing::instrument] +pub fn parse_images_response(body: &str) -> eyre::Result { + let dom = Html::parse_document(body); + + let mut image_results = Vec::new(); + + let image_container_el_sel = Selector::parse(".imgpt").unwrap(); + let image_el_sel = Selector::parse(".iusc").unwrap(); + for image_container_el in dom.select(&image_container_el_sel) { + let image_el = image_container_el + .select(&image_el_sel) + .next() + .ok_or_else(|| eyre!("no image element found"))?; + + // parse the "m" attribute as json + let Some(data) = image_el.value().attr("m") else { + // this is normal, i think + continue; + }; + let data = serde_json::from_str::(data)?; + let page_url = data + .get("purl") + .and_then(|v| v.as_str()) + .unwrap_or_default(); + let image_url = data + // short for media url, probably + .get("murl") + .and_then(|v| v.as_str()) + .unwrap_or_default(); + let page_title = data + .get("t") + .and_then(|v| v.as_str()) + .unwrap_or_default() + // bing adds these unicode characters around matches + .replace('', "") + .replace('', ""); + + // the text looks like "1200 x 1600 · jpegWikipedia" + // (the last part is incorrectly parsed since the actual text is inside another + // element but this is already good enough for our purposes) + let text = image_container_el.text().collect::(); + let width_height: Vec = text + .split(" · ") + .next() + .unwrap_or_default() + .split(" x ") + .map(|s| s.parse().unwrap_or_default()) + .collect(); + let (width, height) = match width_height.as_slice() { + [width, height] => (*width, *height), + _ => { + warn!("couldn't get width and height from text \"{text}\""); + continue; + } + }; + + image_results.push(EngineImageResult { + page_url: page_url.to_string(), + image_url: image_url.to_string(), + title: page_title.to_string(), + width, + height, + }); + } + + Ok(EngineImagesResponse { image_results }) +} + fn clean_url(url: &str) -> eyre::Result { // clean up bing's tracking urls if url.starts_with("https://www.bing.com/ck/a?") { diff --git a/src/engines/search/google.rs b/src/engines/search/google.rs index 2b4aa58..1cd30b0 100644 --- a/src/engines/search/google.rs +++ b/src/engines/search/google.rs @@ -1,8 +1,10 @@ +use eyre::eyre; use scraper::{ElementRef, Selector}; +use tracing::warn; use url::Url; use crate::{ - engines::{EngineResponse, CLIENT}, + engines::{EngineImageResult, EngineImagesResponse, EngineResponse, CLIENT}, parse::{parse_html_response_with_opts, ParseOpts, QueryMethod}, }; @@ -10,8 +12,11 @@ pub fn request(query: &str) -> reqwest::RequestBuilder { CLIENT.get( Url::parse_with_params( "https://www.google.com/search", - // nfpr makes it not try to autocorrect - &[("q", query), ("nfpr", "1")], + &[ + ("q", query), + // nfpr makes it not try to autocorrect + ("nfpr", "1"), + ], ) .unwrap(), ) @@ -112,6 +117,92 @@ pub fn parse_autocomplete_response(body: &str) -> eyre::Result> { .collect()) } +pub fn request_images(query: &str) -> reqwest::RequestBuilder { + // ok so google also has a json api for images BUT it gives us less results + CLIENT.get( + Url::parse_with_params( + "https://www.google.com/search", + &[("q", query), ("udm", "2"), ("prmd", "ivsnmbtz")], + ) + .unwrap(), + ) +} + +pub fn parse_images_response(body: &str) -> eyre::Result { + // we can't just scrape the html because it won't give us the image sources, + // so... we have to scrape their internal json + + // iterate through every script until we find something that matches our regex + let internal_json_regex = + regex::Regex::new(r#"(?:\(function\(\)\{google\.jl=\{.+?)var \w=(\{".+?\});"#)?; + let mut internal_json = None; + let dom = scraper::Html::parse_document(body); + for script in dom.select(&Selector::parse("script").unwrap()) { + let script = script.inner_html(); + if let Some(captures) = internal_json_regex.captures(&script).and_then(|c| c.get(1)) { + internal_json = Some(captures.as_str().to_string()); + break; + } + } + + let internal_json = + internal_json.ok_or_else(|| eyre!("couldn't get internal json for google images"))?; + let internal_json: serde_json::Map = + serde_json::from_str(&internal_json)?; + + let mut image_results = Vec::new(); + for element_json in internal_json.values() { + // the internal json uses arrays instead of maps, which makes it kinda hard to + // use and also probably pretty unstable + + let Some(element_json) = element_json + .as_array() + .and_then(|a| a.get(1)) + .and_then(|v| v.as_array()) + else { + continue; + }; + + let Some((image_url, width, height)) = element_json + .get(3) + .and_then(|v| serde_json::from_value(v.clone()).ok()) + else { + warn!("couldn't get image data from google images json"); + continue; + }; + + // this is probably pretty brittle, hopefully google doesn't break it any time + // soon + let Some(page) = element_json + .get(9) + .and_then(|v| v.as_object()) + .and_then(|o| o.get("2003")) + .and_then(|v| v.as_array()) + else { + warn!("couldn't get page data from google images json"); + continue; + }; + let Some(page_url) = page.get(2).and_then(|v| v.as_str()).map(|s| s.to_string()) else { + warn!("couldn't get page url from google images json"); + continue; + }; + let Some(title) = page.get(3).and_then(|v| v.as_str()).map(|s| s.to_string()) else { + warn!("couldn't get page title from google images json"); + continue; + }; + + image_results.push(EngineImageResult { + image_url, + page_url, + title, + width, + height, + }); + } + + Ok(EngineImagesResponse { image_results }) +} + fn clean_url(url: &str) -> eyre::Result { if url.starts_with("/url?q=") { // get the q param diff --git a/src/web/assets/style.css b/src/web/assets/style.css index 2295bc2..427d8cb 100644 --- a/src/web/assets/style.css +++ b/src/web/assets/style.css @@ -58,6 +58,13 @@ main { background-color: var(--bg-2); min-height: 100%; } +.search-images > main { + /* image search uses 100% width */ + max-width: 100%; +} +.results-container.search-images { + max-width: none; +} @media screen and (max-width: 74rem) { /* small screens */ .results-container { @@ -145,6 +152,21 @@ h1 { background: var(--bg-4); } +/* search tabs (like images, if enabled) */ +.search-tabs { + display: flex; + gap: 0.5rem; + margin-bottom: 0.5rem; + margin-top: -0.5rem; +} +.search-tab { + border: 1px solid var(--bg-4); + padding: 0.25rem; +} +a.search-tab { + color: var(--link); +} + /* search result */ .search-result { padding-top: 1rem; @@ -298,7 +320,7 @@ h3.answer-thesaurus-category-title { text-decoration: underline; } .answer-notepad { - width: calc( 100% - 4px ); + width: calc(100% - 4px); height: fit-content; overflow-y: show; background-color: transparent; @@ -373,9 +395,56 @@ h3.answer-thesaurus-category-title { .infobox-minecraft_wiki-article > .notaninfobox { display: none !important; } -.noexcerpt, .navigation-not-searchable { +.noexcerpt, +.navigation-not-searchable { display: none !important; } .mcw-mainpage-icon { display: inline-block; -} \ No newline at end of file +} + +/* image results */ +.image-results { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; +} +.image-result { + min-width: 12rem; + position: relative; +} +.image-result-img-container { + margin: 0 auto; + width: fit-content; +} +.image-result img { + height: 10.3rem; + width: auto; +} +.image-result-page-anchor { + display: block; + height: 2em; +} +.image-result-page-url { + overflow: hidden; + text-overflow: ellipsis; + + font-size: 0.8rem; + + white-space: nowrap; + width: 100%; + position: absolute; + display: block; +} +.image-result-title { + overflow: hidden; + text-overflow: ellipsis; + + font-size: 0.85rem; + + white-space: nowrap; + width: 100%; + position: absolute; + display: block; + margin-top: 1em; +} diff --git a/src/web/image_proxy.rs b/src/web/image_proxy.rs new file mode 100644 index 0000000..f82f1e8 --- /dev/null +++ b/src/web/image_proxy.rs @@ -0,0 +1,73 @@ +use std::{collections::HashMap, sync::Arc}; + +use axum::{ + extract::{Query, State}, + http::StatusCode, + response::{IntoResponse, Response}, +}; +use tracing::error; + +use crate::{config::Config, engines}; + +pub async fn route( + Query(params): Query>, + State(config): State>, +) -> Response { + let proxy_config = &config.image_search.proxy; + if !proxy_config.enabled.unwrap() { + return (StatusCode::FORBIDDEN, "Image proxy is disabled").into_response(); + }; + let url = params.get("url").cloned().unwrap_or_default(); + if url.is_empty() { + return (StatusCode::BAD_REQUEST, "Missing `url` parameter").into_response(); + } + + let mut res = match engines::CLIENT + .get(&url) + .header("accept", "image/*") + .send() + .await + { + Ok(res) => res, + Err(err) => { + error!("Image proxy error for {url}: {err}"); + return (StatusCode::INTERNAL_SERVER_ERROR, "Image proxy error").into_response(); + } + }; + + let max_size = proxy_config.max_download_size.unwrap(); + + if res.content_length().unwrap_or_default() > max_size { + return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response(); + } + // validate content-type + let content_type = res + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or_default() + .to_string(); + if !content_type.starts_with("image/") { + return (StatusCode::BAD_REQUEST, "Not an image").into_response(); + } + + let mut image_bytes = Vec::new(); + while let Ok(Some(chunk)) = res.chunk().await { + image_bytes.extend_from_slice(&chunk); + if image_bytes.len() as u64 > max_size { + return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response(); + } + } + + ( + [ + (axum::http::header::CONTENT_TYPE, content_type), + ( + axum::http::header::CACHE_CONTROL, + "public, max-age=31536000".to_owned(), + ), + ], + image_bytes, + ) + .into_response() +} diff --git a/src/web/mod.rs b/src/web/mod.rs index b4d23fc..11a58ba 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -1,4 +1,5 @@ pub mod autocomplete; +mod image_proxy; pub mod index; pub mod opensearch; pub mod search; @@ -45,6 +46,7 @@ pub async fn run(config: Config) { .route("/opensearch.xml", get(opensearch::route)) .route("/search", get(search::route)) .route("/autocomplete", get(autocomplete::route)) + .route("/image-proxy", get(image_proxy::route)) .with_state(Arc::new(config)); info!("Listening on http://{bind_addr}"); diff --git a/src/web/search.rs b/src/web/search.rs index ea98bf7..626678e 100644 --- a/src/web/search.rs +++ b/src/web/search.rs @@ -1,4 +1,7 @@ -use std::{collections::HashMap, net::SocketAddr, sync::Arc}; +mod all; +mod images; + +use std::{collections::HashMap, net::SocketAddr, str::FromStr, sync::Arc}; use async_stream::stream; use axum::{ @@ -8,142 +11,68 @@ use axum::{ response::IntoResponse, }; use bytes::Bytes; -use maud::{html, PreEscaped}; +use maud::{html, PreEscaped, DOCTYPE}; use crate::{ config::Config, - engines::{self, Engine, EngineProgressUpdate, ProgressUpdateData, Response, SearchQuery}, + engines::{ + self, Engine, EngineProgressUpdate, ProgressUpdateData, ResponseForTab, SearchQuery, + SearchTab, + }, }; -fn render_beginning_of_html(query: &str) -> String { +fn render_beginning_of_html(search: &SearchQuery) -> String { let head_html = html! { head { meta charset="UTF-8"; meta name="viewport" content="width=device-width, initial-scale=1.0"; title { - (query) + (search.query) " - metasearch" } link rel="stylesheet" href="/style.css"; script src="/script.js" defer {} link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml"; } - }.into_string(); + }; let form_html = html! { form."search-form" action="/search" method="get" { - input #"search-input" type="text" name="q" placeholder="Search" value=(query) autofocus onfocus="this.select()" autocomplete="off"; + input #"search-input" type="text" name="q" placeholder="Search" value=(search.query) autofocus onfocus="this.select()" autocomplete="off"; input type="submit" value="Search"; } - }.into_string(); + @if search.config.image_search.enabled.unwrap() { + div.search-tabs { + @if search.tab == SearchTab::All { span.search-tab.selected { "All" } } + @else { a.search-tab href={ "?q=" (search.query) } { "All" } } + @if search.tab == SearchTab::Images { span.search-tab.selected { "Images" } } + @else { a.search-tab href={ "?q=" (search.query) "&tab=images" } { "Images" } } + } + } + }; - format!( - r#" - -{head_html} - -
-
- {form_html} -
-"# - ) + // we don't close the elements here because we do chunked responses + html! { + (DOCTYPE) + html lang="en"; + (head_html) + body; + div.results-container.{"search-" (search.tab.to_string())}; + main; + (form_html) + div.progress-updates; + } + .into_string() } fn render_end_of_html() -> String { r"
".to_string() } -fn render_engine_list(engines: &[engines::Engine], config: &Config) -> PreEscaped { - let mut html = String::new(); - for (i, engine) in engines.iter().enumerate() { - if config.ui.show_engine_list_separator.unwrap() && i > 0 { - html.push_str(" · "); - } - let raw_engine_id = &engine.id(); - let engine_id = if config.ui.show_engine_list_separator.unwrap() { - raw_engine_id.replace('_', " ") - } else { - raw_engine_id.to_string() - }; - html.push_str(&html! { span."engine-list-item" { (engine_id) } }.into_string()) +fn render_results_for_tab(response: ResponseForTab) -> PreEscaped { + match response { + ResponseForTab::All(r) => all::render_results(r), + ResponseForTab::Images(r) => images::render_results(r), } - html! { - div."engine-list" { - (PreEscaped(html)) - } - } -} - -fn render_search_result(result: &engines::SearchResult, config: &Config) -> PreEscaped { - html! { - div."search-result" { - a."search-result-anchor" rel="noreferrer" href=(result.url) { - span."search-result-url" { (result.url) } - h3."search-result-title" { (result.title) } - } - p."search-result-description" { (result.description) } - (render_engine_list(&result.engines.iter().copied().collect::>(), config)) - } - } -} - -fn render_featured_snippet( - featured_snippet: &engines::FeaturedSnippet, - config: &Config, -) -> PreEscaped { - html! { - div."featured-snippet" { - p."search-result-description" { (featured_snippet.description) } - a."search-result-anchor" rel="noreferrer" href=(featured_snippet.url) { - span."search-result-url" { (featured_snippet.url) } - h3."search-result-title" { (featured_snippet.title) } - } - (render_engine_list(&[featured_snippet.engine], config)) - } - } -} - -fn render_results(response: Response) -> PreEscaped { - let mut html = String::new(); - if let Some(infobox) = &response.infobox { - html.push_str( - &html! { - div."infobox" { - (infobox.html) - (render_engine_list(&[infobox.engine], &response.config)) - } - } - .into_string(), - ); - } - if let Some(answer) = &response.answer { - html.push_str( - &html! { - div."answer" { - (answer.html) - (render_engine_list(&[answer.engine], &response.config)) - } - } - .into_string(), - ); - } - if let Some(featured_snippet) = &response.featured_snippet { - html.push_str(&render_featured_snippet(featured_snippet, &response.config).into_string()); - } - for result in &response.search_results { - html.push_str(&render_search_result(result, &response.config).into_string()); - } - - if html.is_empty() { - html.push_str( - &html! { - p { "No results." } - } - .into_string(), - ); - } - - PreEscaped(html) } fn render_engine_progress_update( @@ -173,6 +102,27 @@ fn render_engine_progress_update( .into_string() } +pub fn render_engine_list(engines: &[engines::Engine], config: &Config) -> PreEscaped { + let mut html = String::new(); + for (i, engine) in engines.iter().enumerate() { + if config.ui.show_engine_list_separator.unwrap() && i > 0 { + html.push_str(" · "); + } + let raw_engine_id = &engine.id(); + let engine_id = if config.ui.show_engine_list_separator.unwrap() { + raw_engine_id.replace('_', " ") + } else { + raw_engine_id.to_string() + }; + html.push_str(&html! { span.engine-list-item { (engine_id) } }.into_string()) + } + html! { + div.engine-list { + (PreEscaped(html)) + } + } +} + pub async fn route( Query(params): Query>, State(config): State>, @@ -197,8 +147,14 @@ pub async fn route( ); } + let search_tab = params + .get("tab") + .and_then(|t| SearchTab::from_str(t).ok()) + .unwrap_or_default(); + let query = SearchQuery { query, + tab: search_tab, request_headers: headers .clone() .into_iter() @@ -253,16 +209,11 @@ pub async fn route( second_part.push_str(""); // close progress-updates second_part.push_str(""); - second_part.push_str(&render_results(results).into_string()); + second_part.push_str(&render_results_for_tab(results).into_string()); yield Ok(Bytes::from(second_part)); }, ProgressUpdateData::PostSearchInfobox(infobox) => { - third_part.push_str(&html! { - div."infobox"."postsearch-infobox" { - (infobox.html) - (render_engine_list(&[infobox.engine], &config)) - } - }.into_string()); + third_part.push_str(&all::render_infobox(&infobox, &config).into_string()); } } } diff --git a/src/web/search/all.rs b/src/web/search/all.rs new file mode 100644 index 0000000..ce8e16e --- /dev/null +++ b/src/web/search/all.rs @@ -0,0 +1,93 @@ +//! Rendering results in the "all" tab. + +use maud::{html, PreEscaped}; + +use crate::{ + config::Config, + engines::{self, EngineSearchResult, Infobox, Response}, + web::search::render_engine_list, +}; + +pub fn render_results(response: Response) -> PreEscaped { + let mut html = String::new(); + if let Some(infobox) = &response.infobox { + html.push_str( + &html! { + div."infobox" { + (infobox.html) + (render_engine_list(&[infobox.engine], &response.config)) + } + } + .into_string(), + ); + } + if let Some(answer) = &response.answer { + html.push_str( + &html! { + div."answer" { + (answer.html) + (render_engine_list(&[answer.engine], &response.config)) + } + } + .into_string(), + ); + } + if let Some(featured_snippet) = &response.featured_snippet { + html.push_str(&render_featured_snippet(featured_snippet, &response.config).into_string()); + } + for result in &response.search_results { + html.push_str(&render_search_result(result, &response.config).into_string()); + } + + if html.is_empty() { + html.push_str( + &html! { + p { "No results." } + } + .into_string(), + ); + } + + PreEscaped(html) +} + +fn render_search_result( + result: &engines::SearchResult, + config: &Config, +) -> PreEscaped { + html! { + div."search-result" { + a."search-result-anchor" rel="noreferrer" href=(result.result.url) { + span."search-result-url" { (result.result.url) } + h3."search-result-title" { (result.result.title) } + } + p."search-result-description" { (result.result.description) } + (render_engine_list(&result.engines.iter().copied().collect::>(), config)) + } + } +} + +fn render_featured_snippet( + featured_snippet: &engines::FeaturedSnippet, + config: &Config, +) -> PreEscaped { + html! { + div."featured-snippet" { + p."search-result-description" { (featured_snippet.description) } + a."search-result-anchor" rel="noreferrer" href=(featured_snippet.url) { + span."search-result-url" { (featured_snippet.url) } + h3."search-result-title" { (featured_snippet.title) } + } + (render_engine_list(&[featured_snippet.engine], config)) + } + } +} + +pub fn render_infobox(infobox: &Infobox, config: &Config) -> PreEscaped { + html! { + div."infobox"."postsearch-infobox" { + (infobox.html) + (render_engine_list(&[infobox.engine], &config)) + } + } +} diff --git a/src/web/search/images.rs b/src/web/search/images.rs new file mode 100644 index 0000000..846d96d --- /dev/null +++ b/src/web/search/images.rs @@ -0,0 +1,48 @@ +use maud::{html, PreEscaped}; + +use crate::{ + config::Config, + engines::{self, EngineImageResult, ImagesResponse}, + web::search::render_engine_list, +}; + +pub fn render_results(response: ImagesResponse) -> PreEscaped { + html! { + div.image-results { + @for image in &response.image_results { + (render_image_result(image, &response.config)) + } + } + } +} + +fn render_image_result( + result: &engines::SearchResult, + config: &Config, +) -> PreEscaped { + let original_image_src = &result.result.image_url; + let image_src = if config.image_search.proxy.enabled.unwrap() { + // serialize url params + let escaped_param = + url::form_urlencoded::byte_serialize(original_image_src.as_bytes()).collect::(); + format!("/image-proxy?url={}", escaped_param) + } else { + original_image_src.to_string() + }; + html! { + div.image-result { + a.image-result-anchor rel="noreferrer" href=(original_image_src) target="_blank" { + div.image-result-img-container { + img loading="lazy" src=(image_src) width=(result.result.width) height=(result.result.height); + } + } + a.image-result-page-anchor href=(result.result.page_url) { + span.image-result-page-url.search-result-url { (result.result.page_url) } + span.image-result-title { (result.result.title) } + } + @if config.image_search.show_engines.unwrap() { + {(render_engine_list(&result.engines.iter().copied().collect::>(), &config))} + } + } + } +}