Image search (#10)

* add tab for image search

* add basic implementation of image search

* add proxy

* fix google images regex breaking when the query has spaces

* fix sizing of image elements while they're loading

* add optional engines indicator to image search

* add bing images

* fix some parsing issues with bing images

* fix bing titles
This commit is contained in:
mat 2024-05-20 04:08:49 -05:00 committed by GitHub
parent 878510bcb2
commit ef65e60f9f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 1084 additions and 388 deletions

1
Cargo.lock generated
View File

@ -1841,6 +1841,7 @@ version = "1.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
dependencies = [ dependencies = [
"indexmap",
"itoa", "itoa",
"ryu", "ryu",
"serde", "serde",

View File

@ -28,15 +28,11 @@ numbat = "1.11.0"
once_cell = "1.19.0" once_cell = "1.19.0"
rand = "0.8.5" rand = "0.8.5"
regex = "1.10.3" regex = "1.10.3"
reqwest = { version = "0.11.26", default-features = false, features = [ reqwest = { version = "0.11.26", default-features = false, features = ["rustls-tls", "gzip", "deflate", "brotli"] }
"rustls-tls",
"gzip",
"deflate",
"brotli",
] }
scraper = "0.19.0" scraper = "0.19.0"
serde = { version = "1.0.197", features = ["derive"] } serde = { version = "1.0.197", features = ["derive"] }
serde_json = "1.0.114" # preserve_order is needed for google images. yippee!
serde_json = { version = "1.0.114", features = ["preserve_order"] }
tokio = { version = "1.36.0", features = ["rt", "macros"] } tokio = { version = "1.36.0", features = ["rt", "macros"] }
tokio-stream = "0.1.15" tokio-stream = "0.1.15"
toml = { version = "0.8.12", default-features = false, features = ["parse"] } toml = { version = "0.8.12", default-features = false, features = ["parse"] }

View File

@ -6,6 +6,11 @@ bind = "0.0.0.0:28019"
show_engine_list_separator = false show_engine_list_separator = false
show_version_info = false show_version_info = false
[image_search]
enabled = false
show_engines = true
proxy = { enabled = true, max_download_size = 10_000_000 }
[engines] [engines]
google = { weight = 1.05 } google = { weight = 1.05 }
bing = { weight = 1.0 } bing = { weight = 1.0 }

View File

@ -12,6 +12,8 @@ pub struct Config {
#[serde(default)] #[serde(default)]
pub ui: UiConfig, pub ui: UiConfig,
#[serde(default)] #[serde(default)]
pub image_search: ImageSearchConfig,
#[serde(default)]
pub engines: EnginesConfig, pub engines: EnginesConfig,
} }
@ -23,12 +25,51 @@ pub struct UiConfig {
pub show_version_info: Option<bool>, pub show_version_info: Option<bool>,
} }
#[derive(Deserialize, Debug, Default)]
pub struct ImageSearchConfig {
pub enabled: Option<bool>,
pub show_engines: Option<bool>,
#[serde(default)]
pub proxy: ImageProxyConfig,
}
#[derive(Deserialize, Debug, Default)]
pub struct ImageProxyConfig {
/// Whether we should proxy remote images through our server. This is mostly
/// a privacy feature.
pub enabled: Option<bool>,
/// The maximum size of an image that can be proxied. This is in bytes.
pub max_download_size: Option<u64>,
}
#[derive(Deserialize, Debug, Default)] #[derive(Deserialize, Debug, Default)]
pub struct EnginesConfig { pub struct EnginesConfig {
#[serde(flatten)] #[serde(flatten)]
pub map: HashMap<Engine, DefaultableEngineConfig>, pub map: HashMap<Engine, DefaultableEngineConfig>,
} }
#[derive(Deserialize, Clone, Debug)]
#[serde(untagged)]
pub enum DefaultableEngineConfig {
Boolean(bool),
Full(FullEngineConfig),
}
#[derive(Deserialize, Clone, Debug)]
pub struct FullEngineConfig {
#[serde(default = "fn_true")]
pub enabled: bool,
/// The priority of this engine relative to the other engines. The default
/// is 1, and a value of 0 is treated as the default.
#[serde(default)]
pub weight: f64,
/// Per-engine configs. These are parsed at request time.
#[serde(flatten)]
#[serde(default)]
pub extra: toml::Table,
}
impl Config { impl Config {
pub fn read_or_create(config_path: &Path) -> eyre::Result<Self> { pub fn read_or_create(config_path: &Path) -> eyre::Result<Self> {
let base_config_str = include_str!("../config-base.toml"); let base_config_str = include_str!("../config-base.toml");
@ -50,20 +91,39 @@ impl Config {
// use the default for something. // use the default for something.
pub fn update(&mut self, new: Config) { pub fn update(&mut self, new: Config) {
self.bind = new.bind; self.bind = new.bind;
self.ui.show_engine_list_separator = new self.ui.update(new.ui);
.ui self.image_search.update(new.image_search);
self.engines.update(new.engines);
}
}
impl UiConfig {
pub fn update(&mut self, new: UiConfig) {
self.show_engine_list_separator = new
.show_engine_list_separator .show_engine_list_separator
.or(self.ui.show_engine_list_separator); .or(self.show_engine_list_separator);
assert_ne!(self.ui.show_engine_list_separator, None); assert_ne!(self.show_engine_list_separator, None);
self.ui.show_version_info = new.ui.show_version_info.or(self.ui.show_version_info); self.show_version_info = new.show_version_info.or(self.show_version_info);
assert_ne!(self.ui.show_version_info, None); assert_ne!(self.show_version_info, None);
for (key, new) in new.engines.map {
if let Some(existing) = self.engines.map.get_mut(&key) {
existing.update(new);
} else {
self.engines.map.insert(key, new);
} }
}
impl ImageSearchConfig {
pub fn update(&mut self, new: ImageSearchConfig) {
self.enabled = new.enabled.or(self.enabled);
assert_ne!(self.enabled, None);
self.show_engines = new.show_engines.or(self.show_engines);
assert_ne!(self.show_engines, None);
self.proxy.update(new.proxy);
} }
}
impl ImageProxyConfig {
pub fn update(&mut self, new: ImageProxyConfig) {
self.enabled = new.enabled.or(self.enabled);
assert_ne!(self.enabled, None);
self.max_download_size = new.max_download_size.or(self.max_download_size);
assert_ne!(self.max_download_size, None);
} }
} }
@ -91,13 +151,16 @@ impl EnginesConfig {
None => &DEFAULT_ENABLED_FULL_ENGINE_CONFIG, None => &DEFAULT_ENABLED_FULL_ENGINE_CONFIG,
} }
} }
}
#[derive(Deserialize, Clone, Debug)] pub fn update(&mut self, new: Self) {
#[serde(untagged)] for (key, new) in new.map {
pub enum DefaultableEngineConfig { if let Some(existing) = self.map.get_mut(&key) {
Boolean(bool), existing.update(new);
Full(FullEngineConfig), } else {
self.map.insert(key, new);
}
}
}
} }
impl DefaultableEngineConfig { impl DefaultableEngineConfig {
@ -115,24 +178,9 @@ impl Default for DefaultableEngineConfig {
} }
} }
#[derive(Deserialize, Clone, Debug)]
pub struct FullEngineConfig {
#[serde(default = "default_true")]
pub enabled: bool,
/// The priority of this engine relative to the other engines. The default
/// is 1, and a value of 0 is treated as the default.
#[serde(default)]
pub weight: f64,
/// Per-engine configs. These are parsed at request time.
#[serde(flatten)]
#[serde(default)]
pub extra: toml::Table,
}
// serde expects a function as the default, this just exists so "enabled" is // serde expects a function as the default, this just exists so "enabled" is
// always true by default // always true by default
fn default_true() -> bool { fn fn_true() -> bool {
true true
} }

View File

@ -126,3 +126,31 @@ macro_rules! engine_postsearch_requests {
} }
}; };
} }
#[macro_export]
macro_rules! engine_image_requests {
($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => {
impl Engine {
#[must_use]
pub fn request_images(&self, query: &SearchQuery) -> RequestResponse {
match self {
$(
Engine::$engine => $module::$engine_id::$request(query).into(),
)*
_ => RequestResponse::None,
}
}
pub fn parse_images_response(&self, res: &HttpResponse) -> eyre::Result<EngineImagesResponse> {
#[allow(clippy::useless_conversion)]
match self {
$(
Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response }
.ok_or_else(|| eyre::eyre!("engine {self:?} can't parse images response"))?,
)*
_ => eyre::bail!("engine {self:?} can't parse response"),
}
}
}
};
}

View File

@ -1,25 +1,26 @@
use std::{ use std::{
collections::{BTreeSet, HashMap}, collections::{BTreeSet, HashMap},
fmt, fmt::{self, Display},
net::IpAddr, net::IpAddr,
ops::Deref, ops::Deref,
str::FromStr, str::FromStr,
sync::Arc, sync::Arc,
time::Instant, time::{Duration, Instant},
}; };
use futures::future::join_all; use futures::future::join_all;
use maud::PreEscaped; use maud::PreEscaped;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use reqwest::header::HeaderMap; use reqwest::{header::HeaderMap, RequestBuilder};
use serde::{Deserialize, Deserializer}; use serde::{Deserialize, Deserializer};
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tracing::{error, info}; use tracing::{error, info};
mod macros; mod macros;
mod ranking;
use crate::{ use crate::{
config::Config, engine_autocomplete_requests, engine_postsearch_requests, engine_requests, config::Config, engine_autocomplete_requests, engine_image_requests,
engines, engine_postsearch_requests, engine_requests, engines,
}; };
pub mod answer; pub mod answer;
@ -90,6 +91,11 @@ engine_postsearch_requests! {
StackExchange => postsearch::stackexchange::request, parse_response, StackExchange => postsearch::stackexchange::request, parse_response,
} }
engine_image_requests! {
Google => search::google::request_images, parse_images_response,
Bing => search::bing::request_images, parse_images_response,
}
impl fmt::Display for Engine { impl fmt::Display for Engine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.id()) write!(f, "{}", self.id())
@ -108,6 +114,7 @@ impl<'de> Deserialize<'de> for Engine {
pub struct SearchQuery { pub struct SearchQuery {
pub query: String, pub query: String,
pub tab: SearchTab,
pub request_headers: HashMap<String, String>, pub request_headers: HashMap<String, String>,
pub ip: String, pub ip: String,
/// The config is part of the query so it's possible to make a query with a /// The config is part of the query so it's possible to make a query with a
@ -123,6 +130,31 @@ impl Deref for SearchQuery {
} }
} }
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
pub enum SearchTab {
#[default]
All,
Images,
}
impl FromStr for SearchTab {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"all" => Ok(Self::All),
"images" => Ok(Self::Images),
_ => Err(()),
}
}
}
impl Display for SearchTab {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::All => write!(f, "all"),
Self::Images => write!(f, "images"),
}
}
}
pub enum RequestResponse { pub enum RequestResponse {
None, None,
Http(reqwest::RequestBuilder), Http(reqwest::RequestBuilder),
@ -172,7 +204,7 @@ impl From<HttpResponse> for reqwest::Response {
} }
} }
#[derive(Debug)] #[derive(Debug, Clone)]
pub struct EngineSearchResult { pub struct EngineSearchResult {
pub url: String, pub url: String,
pub title: String, pub title: String,
@ -194,6 +226,11 @@ pub struct EngineResponse {
pub infobox_html: Option<PreEscaped<String>>, pub infobox_html: Option<PreEscaped<String>>,
} }
#[derive(Default)]
pub struct EngineImagesResponse {
pub image_results: Vec<EngineImageResult>,
}
impl EngineResponse { impl EngineResponse {
#[must_use] #[must_use]
pub fn new() -> Self { pub fn new() -> Self {
@ -217,6 +254,22 @@ impl EngineResponse {
} }
} }
impl EngineImagesResponse {
#[must_use]
pub fn new() -> Self {
Self::default()
}
}
#[derive(Debug, Clone)]
pub struct EngineImageResult {
pub image_url: String,
pub page_url: String,
pub title: String,
pub width: u64,
pub height: u64,
}
#[derive(Debug)] #[derive(Debug)]
pub enum EngineProgressUpdate { pub enum EngineProgressUpdate {
Requesting, Requesting,
@ -231,7 +284,7 @@ pub enum ProgressUpdateData {
engine: Engine, engine: Engine,
update: EngineProgressUpdate, update: EngineProgressUpdate,
}, },
Response(Response), Response(ResponseForTab),
PostSearchInfobox(Infobox), PostSearchInfobox(Infobox),
} }
@ -251,17 +304,40 @@ impl ProgressUpdate {
} }
} }
#[tracing::instrument(fields(query = %query.query), skip(progress_tx))] async fn make_request(
pub async fn search( request: RequestBuilder,
engine: Engine,
query: &SearchQuery, query: &SearchQuery,
progress_tx: mpsc::UnboundedSender<ProgressUpdate>, send_engine_progress_update: impl Fn(Engine, EngineProgressUpdate),
) -> eyre::Result<HttpResponse> {
send_engine_progress_update(engine, EngineProgressUpdate::Requesting);
let mut res = request.send().await?;
send_engine_progress_update(engine, EngineProgressUpdate::Downloading);
let mut body_bytes = Vec::new();
while let Some(chunk) = res.chunk().await? {
body_bytes.extend_from_slice(&chunk);
}
let body = String::from_utf8_lossy(&body_bytes).to_string();
send_engine_progress_update(engine, EngineProgressUpdate::Parsing);
let http_response = HttpResponse {
res,
body,
config: query.config.clone(),
};
Ok(http_response)
}
async fn make_requests(
query: &SearchQuery,
progress_tx: &mpsc::UnboundedSender<ProgressUpdate>,
start_time: Instant,
send_engine_progress_update: &impl Fn(Engine, EngineProgressUpdate),
) -> eyre::Result<()> { ) -> eyre::Result<()> {
let start_time = Instant::now();
info!("Doing search");
let progress_tx = &progress_tx;
let mut requests = Vec::new(); let mut requests = Vec::new();
for &engine in Engine::all() { for &engine in Engine::all() {
let engine_config = query.config.engines.get(engine); let engine_config = query.config.engines.get(engine);
@ -274,59 +350,18 @@ pub async fn search(
let response = match request_response { let response = match request_response {
RequestResponse::Http(request) => { RequestResponse::Http(request) => {
progress_tx.send(ProgressUpdate::new( let http_response =
ProgressUpdateData::Engine { make_request(request, engine, query, send_engine_progress_update).await?;
engine,
update: EngineProgressUpdate::Requesting,
},
start_time,
))?;
let mut res = request.send().await?;
progress_tx.send(ProgressUpdate::new(
ProgressUpdateData::Engine {
engine,
update: EngineProgressUpdate::Downloading,
},
start_time,
))?;
let mut body_bytes = Vec::new();
while let Some(chunk) = res.chunk().await? {
body_bytes.extend_from_slice(&chunk);
}
let body = String::from_utf8_lossy(&body_bytes).to_string();
progress_tx.send(ProgressUpdate::new(
ProgressUpdateData::Engine {
engine,
update: EngineProgressUpdate::Parsing,
},
start_time,
))?;
let http_response = HttpResponse {
res,
body,
config: query.config.clone(),
};
let response = match engine.parse_response(&http_response) { let response = match engine.parse_response(&http_response) {
Ok(response) => response, Ok(response) => response,
Err(e) => { Err(e) => {
error!("parse error: {e}"); error!("parse error for {engine}: {e}");
EngineResponse::new() EngineResponse::new()
} }
}; };
progress_tx.send(ProgressUpdate::new( send_engine_progress_update(engine, EngineProgressUpdate::Done);
ProgressUpdateData::Engine {
engine,
update: EngineProgressUpdate::Done,
},
start_time,
))?;
response response
} }
@ -347,12 +382,10 @@ pub async fn search(
join_all(response_futures).await.into_iter().collect(); join_all(response_futures).await.into_iter().collect();
let responses = responses_result?; let responses = responses_result?;
let response = merge_engine_responses(query.config.clone(), responses); let response = ranking::merge_engine_responses(query.config.clone(), responses);
let has_infobox = response.infobox.is_some(); let has_infobox = response.infobox.is_some();
progress_tx.send(ProgressUpdate::new( progress_tx.send(ProgressUpdate::new(
ProgressUpdateData::Response(response.clone()), ProgressUpdateData::Response(ResponseForTab::All(response.clone())),
start_time, start_time,
))?; ))?;
@ -420,6 +453,98 @@ pub async fn search(
Ok(()) Ok(())
} }
async fn make_image_requests(
query: &SearchQuery,
progress_tx: &mpsc::UnboundedSender<ProgressUpdate>,
start_time: Instant,
send_engine_progress_update: &impl Fn(Engine, EngineProgressUpdate),
) -> eyre::Result<()> {
let mut requests = Vec::new();
for &engine in Engine::all() {
let engine_config = query.config.engines.get(engine);
if !engine_config.enabled {
continue;
}
requests.push(async move {
let request_response = engine.request_images(query);
let response = match request_response {
RequestResponse::Http(request) => {
let http_response =
make_request(request, engine, query, send_engine_progress_update).await?;
let response = match engine.parse_images_response(&http_response) {
Ok(response) => response,
Err(e) => {
error!("parse error for {engine} (images): {e}");
EngineImagesResponse::new()
}
};
send_engine_progress_update(engine, EngineProgressUpdate::Done);
response
}
RequestResponse::Instant(_) => {
error!("unexpected instant response for image request");
EngineImagesResponse::new()
}
RequestResponse::None => EngineImagesResponse::new(),
};
Ok((engine, response))
});
}
let mut response_futures = Vec::new();
for request in requests {
response_futures.push(request);
}
let responses_result: eyre::Result<HashMap<_, _>> =
join_all(response_futures).await.into_iter().collect();
let responses = responses_result?;
let response = ranking::merge_images_responses(query.config.clone(), responses);
progress_tx.send(ProgressUpdate::new(
ProgressUpdateData::Response(ResponseForTab::Images(response.clone())),
start_time,
))?;
Ok(())
}
#[tracing::instrument(fields(query = %query.query), skip(progress_tx))]
pub async fn search(
query: &SearchQuery,
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
) -> eyre::Result<()> {
let start_time = Instant::now();
info!("Doing search");
let progress_tx = &progress_tx;
let send_engine_progress_update = |engine: Engine, update: EngineProgressUpdate| {
let _ = progress_tx.send(ProgressUpdate::new(
ProgressUpdateData::Engine { engine, update },
start_time,
));
};
match query.tab {
SearchTab::All => {
make_requests(query, progress_tx, start_time, &send_engine_progress_update).await?
}
SearchTab::Images => {
make_image_requests(query, progress_tx, start_time, &send_engine_progress_update)
.await?
}
}
Ok(())
}
pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result<Vec<String>> { pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result<Vec<String>> {
let mut requests = Vec::new(); let mut requests = Vec::new();
for &engine in Engine::all() { for &engine in Engine::all() {
@ -452,7 +577,10 @@ pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result<Vec<Stri
join_all(autocomplete_futures).await.into_iter().collect(); join_all(autocomplete_futures).await.into_iter().collect();
let autocomplete_results = autocomplete_results_result?; let autocomplete_results = autocomplete_results_result?;
Ok(merge_autocomplete_responses(config, autocomplete_results)) Ok(ranking::merge_autocomplete_responses(
config,
autocomplete_results,
))
} }
pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| { pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {
@ -466,13 +594,14 @@ pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {
headers.insert("Accept-Language", "en-US,en;q=0.5".parse().unwrap()); headers.insert("Accept-Language", "en-US,en;q=0.5".parse().unwrap());
headers headers
}) })
.timeout(Duration::from_secs(10))
.build() .build()
.unwrap() .unwrap()
}); });
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Response { pub struct Response {
pub search_results: Vec<SearchResult>, pub search_results: Vec<SearchResult<EngineSearchResult>>,
pub featured_snippet: Option<FeaturedSnippet>, pub featured_snippet: Option<FeaturedSnippet>,
pub answer: Option<Answer>, pub answer: Option<Answer>,
pub infobox: Option<Infobox>, pub infobox: Option<Infobox>,
@ -480,10 +609,20 @@ pub struct Response {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct SearchResult { pub struct ImagesResponse {
pub url: String, pub image_results: Vec<SearchResult<EngineImageResult>>,
pub title: String, pub config: Arc<Config>,
pub description: String, }
#[derive(Debug, Clone)]
pub enum ResponseForTab {
All(Response),
Images(ImagesResponse),
}
#[derive(Debug, Clone)]
pub struct SearchResult<R> {
pub result: R,
pub engines: BTreeSet<Engine>, pub engines: BTreeSet<Engine>,
pub score: f64, pub score: f64,
} }
@ -508,149 +647,7 @@ pub struct Infobox {
pub engine: Engine, pub engine: Engine,
} }
fn merge_engine_responses(
config: Arc<Config>,
responses: HashMap<Engine, EngineResponse>,
) -> Response {
let mut search_results: Vec<SearchResult> = Vec::new();
let mut featured_snippet: Option<FeaturedSnippet> = None;
let mut answer: Option<Answer> = None;
let mut infobox: Option<Infobox> = None;
for (engine, response) in responses {
let engine_config = config.engines.get(engine);
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
// score of 0.33, etc.
let base_result_score = 1. / (result_index + 1) as f64;
let result_score = base_result_score * engine_config.weight;
if let Some(existing_result) = search_results
.iter_mut()
.find(|r| r.url == search_result.url)
{
// if the weight of this engine is higher than every other one then replace the
// title and description
if engine_config.weight
> existing_result
.engines
.iter()
.map(|&other_engine| {
let other_engine_config = config.engines.get(other_engine);
other_engine_config.weight
})
.max_by(|a, b| a.partial_cmp(b).unwrap())
.unwrap_or(0.)
{
existing_result.title = search_result.title;
existing_result.description = search_result.description;
}
existing_result.engines.insert(engine);
existing_result.score += result_score;
} else {
search_results.push(SearchResult {
url: search_result.url,
title: search_result.title,
description: search_result.description,
engines: [engine].iter().copied().collect(),
score: result_score,
});
}
}
if let Some(engine_featured_snippet) = response.featured_snippet {
// if it has a higher weight than the current featured snippet
let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| {
let other_engine_config = config.engines.get(s.engine);
other_engine_config.weight
});
if engine_config.weight > featured_snippet_weight {
featured_snippet = Some(FeaturedSnippet {
url: engine_featured_snippet.url,
title: engine_featured_snippet.title,
description: engine_featured_snippet.description,
engine,
});
}
}
if let Some(engine_answer_html) = response.answer_html {
// if it has a higher weight than the current answer
let answer_weight = answer.as_ref().map_or(0., |s| {
let other_engine_config = config.engines.get(s.engine);
other_engine_config.weight
});
if engine_config.weight > answer_weight {
answer = Some(Answer {
html: engine_answer_html,
engine,
});
}
}
if let Some(engine_infobox_html) = response.infobox_html {
// if it has a higher weight than the current infobox
let infobox_weight = infobox.as_ref().map_or(0., |s| {
let other_engine_config = config.engines.get(s.engine);
other_engine_config.weight
});
if engine_config.weight > infobox_weight {
infobox = Some(Infobox {
html: engine_infobox_html,
engine,
});
}
}
}
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
Response {
search_results,
featured_snippet,
answer,
infobox,
config,
}
}
pub struct AutocompleteResult { pub struct AutocompleteResult {
pub query: String, pub query: String,
pub score: f64, pub score: f64,
} }
fn merge_autocomplete_responses(
config: &Config,
responses: HashMap<Engine, Vec<String>>,
) -> Vec<String> {
let mut autocomplete_results: Vec<AutocompleteResult> = Vec::new();
for (engine, response) in responses {
let engine_config = config.engines.get(engine);
for (result_index, autocomplete_result) in response.into_iter().enumerate() {
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
// score of 0.33, etc.
let base_result_score = 1. / (result_index + 1) as f64;
let result_score = base_result_score * engine_config.weight;
if let Some(existing_result) = autocomplete_results
.iter_mut()
.find(|r| r.query == autocomplete_result)
{
existing_result.score += result_score;
} else {
autocomplete_results.push(AutocompleteResult {
query: autocomplete_result,
score: result_score,
});
}
}
}
autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
autocomplete_results.into_iter().map(|r| r.query).collect()
}

View File

@ -5,8 +5,8 @@ use crate::engines::{HttpResponse, Response, CLIENT};
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> { pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
for search_result in response.search_results.iter().take(8) { for search_result in response.search_results.iter().take(8) {
if search_result.url.starts_with("https://docs.rs/") { if search_result.result.url.starts_with("https://docs.rs/") {
return Some(CLIENT.get(search_result.url.as_str())); return Some(CLIENT.get(search_result.result.url.as_str()));
} }
} }

View File

@ -6,8 +6,8 @@ use crate::engines::{answer::regex, Response, CLIENT};
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> { pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
for search_result in response.search_results.iter().take(8) { for search_result in response.search_results.iter().take(8) {
if regex!(r"^https:\/\/github\.com\/[\w-]+\/[\w.-]+$").is_match(&search_result.url) { if regex!(r"^https:\/\/github\.com\/[\w-]+\/[\w.-]+$").is_match(&search_result.result.url) {
return Some(CLIENT.get(search_result.url.as_str())); return Some(CLIENT.get(search_result.result.url.as_str()));
} }
} }

View File

@ -13,10 +13,11 @@ pub struct MdnConfig {
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> { pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
for search_result in response.search_results.iter().take(8) { for search_result in response.search_results.iter().take(8) {
if search_result if search_result
.result
.url .url
.starts_with("https://developer.mozilla.org/en-US/docs/Web") .starts_with("https://developer.mozilla.org/en-US/docs/Web")
{ {
return Some(CLIENT.get(search_result.url.as_str())); return Some(CLIENT.get(search_result.result.url.as_str()));
} }
} }

View File

@ -5,8 +5,12 @@ use crate::engines::{HttpResponse, Response, CLIENT};
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> { pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
for search_result in response.search_results.iter().take(8) { for search_result in response.search_results.iter().take(8) {
if search_result.url.starts_with("https://minecraft.wiki/w/") { if search_result
return Some(CLIENT.get(search_result.url.as_str())); .result
.url
.starts_with("https://minecraft.wiki/w/")
{
return Some(CLIENT.get(search_result.result.url.as_str()));
} }
} }

View File

@ -7,9 +7,9 @@ use crate::engines::{answer::regex, Response, CLIENT};
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> { pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
for search_result in response.search_results.iter().take(8) { for search_result in response.search_results.iter().take(8) {
if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+") if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+")
.is_match(&search_result.url) .is_match(&search_result.result.url)
{ {
return Some(CLIENT.get(search_result.url.as_str())); return Some(CLIENT.get(search_result.result.url.as_str()));
} }
} }

204
src/engines/ranking.rs Normal file
View File

@ -0,0 +1,204 @@
use std::{collections::HashMap, sync::Arc};
use crate::config::Config;
use super::{
Answer, AutocompleteResult, Engine, EngineImageResult, EngineImagesResponse, EngineResponse,
EngineSearchResult, FeaturedSnippet, ImagesResponse, Infobox, Response, SearchResult,
};
pub fn merge_engine_responses(
config: Arc<Config>,
responses: HashMap<Engine, EngineResponse>,
) -> Response {
let mut search_results: Vec<SearchResult<EngineSearchResult>> = Vec::new();
let mut featured_snippet: Option<FeaturedSnippet> = None;
let mut answer: Option<Answer> = None;
let mut infobox: Option<Infobox> = None;
for (engine, response) in responses {
let engine_config = config.engines.get(engine);
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
// score of 0.33, etc.
let base_result_score = 1. / (result_index + 1) as f64;
let result_score = base_result_score * engine_config.weight;
if let Some(existing_result) = search_results
.iter_mut()
.find(|r| r.result.url == search_result.url)
{
// if the weight of this engine is higher than every other one then replace the
// title and description
if engine_config.weight
> existing_result
.engines
.iter()
.map(|&other_engine| {
let other_engine_config = config.engines.get(other_engine);
other_engine_config.weight
})
.max_by(|a, b| a.partial_cmp(b).unwrap())
.unwrap_or(0.)
{
existing_result.result.title = search_result.title;
existing_result.result.description = search_result.description;
}
existing_result.engines.insert(engine);
existing_result.score += result_score;
} else {
search_results.push(SearchResult {
result: search_result,
engines: [engine].iter().copied().collect(),
score: result_score,
});
}
}
if let Some(engine_featured_snippet) = response.featured_snippet {
// if it has a higher weight than the current featured snippet
let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| {
let other_engine_config = config.engines.get(s.engine);
other_engine_config.weight
});
if engine_config.weight > featured_snippet_weight {
featured_snippet = Some(FeaturedSnippet {
url: engine_featured_snippet.url,
title: engine_featured_snippet.title,
description: engine_featured_snippet.description,
engine,
});
}
}
if let Some(engine_answer_html) = response.answer_html {
// if it has a higher weight than the current answer
let answer_weight = answer.as_ref().map_or(0., |s| {
let other_engine_config = config.engines.get(s.engine);
other_engine_config.weight
});
if engine_config.weight > answer_weight {
answer = Some(Answer {
html: engine_answer_html,
engine,
});
}
}
if let Some(engine_infobox_html) = response.infobox_html {
// if it has a higher weight than the current infobox
let infobox_weight = infobox.as_ref().map_or(0., |s| {
let other_engine_config = config.engines.get(s.engine);
other_engine_config.weight
});
if engine_config.weight > infobox_weight {
infobox = Some(Infobox {
html: engine_infobox_html,
engine,
});
}
}
}
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
Response {
search_results,
featured_snippet,
answer,
infobox,
config,
}
}
pub fn merge_autocomplete_responses(
config: &Config,
responses: HashMap<Engine, Vec<String>>,
) -> Vec<String> {
let mut autocomplete_results: Vec<AutocompleteResult> = Vec::new();
for (engine, response) in responses {
let engine_config = config.engines.get(engine);
for (result_index, autocomplete_result) in response.into_iter().enumerate() {
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
// score of 0.33, etc.
let base_result_score = 1. / (result_index + 1) as f64;
let result_score = base_result_score * engine_config.weight;
if let Some(existing_result) = autocomplete_results
.iter_mut()
.find(|r| r.query == autocomplete_result)
{
existing_result.score += result_score;
} else {
autocomplete_results.push(AutocompleteResult {
query: autocomplete_result,
score: result_score,
});
}
}
}
autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
autocomplete_results.into_iter().map(|r| r.query).collect()
}
pub fn merge_images_responses(
config: Arc<Config>,
responses: HashMap<Engine, EngineImagesResponse>,
) -> ImagesResponse {
let mut image_results: Vec<SearchResult<EngineImageResult>> = Vec::new();
for (engine, response) in responses {
let engine_config = config.engines.get(engine);
for (result_index, image_result) in response.image_results.into_iter().enumerate() {
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
// score of 0.33, etc.
let base_result_score = 1. / (result_index + 1) as f64;
let result_score = base_result_score * engine_config.weight;
if let Some(existing_result) = image_results
.iter_mut()
.find(|r| r.result.image_url == image_result.image_url)
{
// if the weight of this engine is higher than every other one then replace the
// title and page url
if engine_config.weight
> existing_result
.engines
.iter()
.map(|&other_engine| {
let other_engine_config = config.engines.get(other_engine);
other_engine_config.weight
})
.max_by(|a, b| a.partial_cmp(b).unwrap())
.unwrap_or(0.)
{
existing_result.result.title = image_result.title;
existing_result.result.page_url = image_result.page_url;
}
existing_result.engines.insert(engine);
existing_result.score += result_score;
} else {
image_results.push(SearchResult {
result: image_result,
engines: [engine].iter().copied().collect(),
score: result_score,
});
}
}
}
image_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
ImagesResponse {
image_results,
config,
}
}

View File

@ -1,9 +1,11 @@
use base64::Engine; use base64::Engine;
use scraper::{ElementRef, Selector}; use eyre::eyre;
use scraper::{ElementRef, Html, Selector};
use tracing::warn;
use url::Url; use url::Url;
use crate::{ use crate::{
engines::{EngineResponse, CLIENT}, engines::{EngineImageResult, EngineImagesResponse, EngineResponse, CLIENT},
parse::{parse_html_response_with_opts, ParseOpts, QueryMethod}, parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
}; };
@ -64,6 +66,89 @@ pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
) )
} }
pub fn request_images(query: &str) -> reqwest::RequestBuilder {
CLIENT.get(
Url::parse_with_params(
"https://www.bing.com/images/async",
&[
("q", query),
("async", "content"),
("first", "1"),
("count", "35"),
],
)
.unwrap(),
)
}
#[tracing::instrument]
pub fn parse_images_response(body: &str) -> eyre::Result<EngineImagesResponse> {
let dom = Html::parse_document(body);
let mut image_results = Vec::new();
let image_container_el_sel = Selector::parse(".imgpt").unwrap();
let image_el_sel = Selector::parse(".iusc").unwrap();
for image_container_el in dom.select(&image_container_el_sel) {
let image_el = image_container_el
.select(&image_el_sel)
.next()
.ok_or_else(|| eyre!("no image element found"))?;
// parse the "m" attribute as json
let Some(data) = image_el.value().attr("m") else {
// this is normal, i think
continue;
};
let data = serde_json::from_str::<serde_json::Value>(data)?;
let page_url = data
.get("purl")
.and_then(|v| v.as_str())
.unwrap_or_default();
let image_url = data
// short for media url, probably
.get("murl")
.and_then(|v| v.as_str())
.unwrap_or_default();
let page_title = data
.get("t")
.and_then(|v| v.as_str())
.unwrap_or_default()
// bing adds these unicode characters around matches
.replace('', "")
.replace('', "");
// the text looks like "1200 x 1600 · jpegWikipedia"
// (the last part is incorrectly parsed since the actual text is inside another
// element but this is already good enough for our purposes)
let text = image_container_el.text().collect::<String>();
let width_height: Vec<u64> = text
.split(" · ")
.next()
.unwrap_or_default()
.split(" x ")
.map(|s| s.parse().unwrap_or_default())
.collect();
let (width, height) = match width_height.as_slice() {
[width, height] => (*width, *height),
_ => {
warn!("couldn't get width and height from text \"{text}\"");
continue;
}
};
image_results.push(EngineImageResult {
page_url: page_url.to_string(),
image_url: image_url.to_string(),
title: page_title.to_string(),
width,
height,
});
}
Ok(EngineImagesResponse { image_results })
}
fn clean_url(url: &str) -> eyre::Result<String> { fn clean_url(url: &str) -> eyre::Result<String> {
// clean up bing's tracking urls // clean up bing's tracking urls
if url.starts_with("https://www.bing.com/ck/a?") { if url.starts_with("https://www.bing.com/ck/a?") {

View File

@ -1,8 +1,10 @@
use eyre::eyre;
use scraper::{ElementRef, Selector}; use scraper::{ElementRef, Selector};
use tracing::warn;
use url::Url; use url::Url;
use crate::{ use crate::{
engines::{EngineResponse, CLIENT}, engines::{EngineImageResult, EngineImagesResponse, EngineResponse, CLIENT},
parse::{parse_html_response_with_opts, ParseOpts, QueryMethod}, parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
}; };
@ -10,8 +12,11 @@ pub fn request(query: &str) -> reqwest::RequestBuilder {
CLIENT.get( CLIENT.get(
Url::parse_with_params( Url::parse_with_params(
"https://www.google.com/search", "https://www.google.com/search",
&[
("q", query),
// nfpr makes it not try to autocorrect // nfpr makes it not try to autocorrect
&[("q", query), ("nfpr", "1")], ("nfpr", "1"),
],
) )
.unwrap(), .unwrap(),
) )
@ -112,6 +117,92 @@ pub fn parse_autocomplete_response(body: &str) -> eyre::Result<Vec<String>> {
.collect()) .collect())
} }
pub fn request_images(query: &str) -> reqwest::RequestBuilder {
// ok so google also has a json api for images BUT it gives us less results
CLIENT.get(
Url::parse_with_params(
"https://www.google.com/search",
&[("q", query), ("udm", "2"), ("prmd", "ivsnmbtz")],
)
.unwrap(),
)
}
pub fn parse_images_response(body: &str) -> eyre::Result<EngineImagesResponse> {
// we can't just scrape the html because it won't give us the image sources,
// so... we have to scrape their internal json
// iterate through every script until we find something that matches our regex
let internal_json_regex =
regex::Regex::new(r#"(?:\(function\(\)\{google\.jl=\{.+?)var \w=(\{".+?\});"#)?;
let mut internal_json = None;
let dom = scraper::Html::parse_document(body);
for script in dom.select(&Selector::parse("script").unwrap()) {
let script = script.inner_html();
if let Some(captures) = internal_json_regex.captures(&script).and_then(|c| c.get(1)) {
internal_json = Some(captures.as_str().to_string());
break;
}
}
let internal_json =
internal_json.ok_or_else(|| eyre!("couldn't get internal json for google images"))?;
let internal_json: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(&internal_json)?;
let mut image_results = Vec::new();
for element_json in internal_json.values() {
// the internal json uses arrays instead of maps, which makes it kinda hard to
// use and also probably pretty unstable
let Some(element_json) = element_json
.as_array()
.and_then(|a| a.get(1))
.and_then(|v| v.as_array())
else {
continue;
};
let Some((image_url, width, height)) = element_json
.get(3)
.and_then(|v| serde_json::from_value(v.clone()).ok())
else {
warn!("couldn't get image data from google images json");
continue;
};
// this is probably pretty brittle, hopefully google doesn't break it any time
// soon
let Some(page) = element_json
.get(9)
.and_then(|v| v.as_object())
.and_then(|o| o.get("2003"))
.and_then(|v| v.as_array())
else {
warn!("couldn't get page data from google images json");
continue;
};
let Some(page_url) = page.get(2).and_then(|v| v.as_str()).map(|s| s.to_string()) else {
warn!("couldn't get page url from google images json");
continue;
};
let Some(title) = page.get(3).and_then(|v| v.as_str()).map(|s| s.to_string()) else {
warn!("couldn't get page title from google images json");
continue;
};
image_results.push(EngineImageResult {
image_url,
page_url,
title,
width,
height,
});
}
Ok(EngineImagesResponse { image_results })
}
fn clean_url(url: &str) -> eyre::Result<String> { fn clean_url(url: &str) -> eyre::Result<String> {
if url.starts_with("/url?q=") { if url.starts_with("/url?q=") {
// get the q param // get the q param

View File

@ -58,6 +58,13 @@ main {
background-color: var(--bg-2); background-color: var(--bg-2);
min-height: 100%; min-height: 100%;
} }
.search-images > main {
/* image search uses 100% width */
max-width: 100%;
}
.results-container.search-images {
max-width: none;
}
@media screen and (max-width: 74rem) { @media screen and (max-width: 74rem) {
/* small screens */ /* small screens */
.results-container { .results-container {
@ -145,6 +152,21 @@ h1 {
background: var(--bg-4); background: var(--bg-4);
} }
/* search tabs (like images, if enabled) */
.search-tabs {
display: flex;
gap: 0.5rem;
margin-bottom: 0.5rem;
margin-top: -0.5rem;
}
.search-tab {
border: 1px solid var(--bg-4);
padding: 0.25rem;
}
a.search-tab {
color: var(--link);
}
/* search result */ /* search result */
.search-result { .search-result {
padding-top: 1rem; padding-top: 1rem;
@ -298,7 +320,7 @@ h3.answer-thesaurus-category-title {
text-decoration: underline; text-decoration: underline;
} }
.answer-notepad { .answer-notepad {
width: calc( 100% - 4px ); width: calc(100% - 4px);
height: fit-content; height: fit-content;
overflow-y: show; overflow-y: show;
background-color: transparent; background-color: transparent;
@ -373,9 +395,56 @@ h3.answer-thesaurus-category-title {
.infobox-minecraft_wiki-article > .notaninfobox { .infobox-minecraft_wiki-article > .notaninfobox {
display: none !important; display: none !important;
} }
.noexcerpt, .navigation-not-searchable { .noexcerpt,
.navigation-not-searchable {
display: none !important; display: none !important;
} }
.mcw-mainpage-icon { .mcw-mainpage-icon {
display: inline-block; display: inline-block;
} }
/* image results */
.image-results {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
}
.image-result {
min-width: 12rem;
position: relative;
}
.image-result-img-container {
margin: 0 auto;
width: fit-content;
}
.image-result img {
height: 10.3rem;
width: auto;
}
.image-result-page-anchor {
display: block;
height: 2em;
}
.image-result-page-url {
overflow: hidden;
text-overflow: ellipsis;
font-size: 0.8rem;
white-space: nowrap;
width: 100%;
position: absolute;
display: block;
}
.image-result-title {
overflow: hidden;
text-overflow: ellipsis;
font-size: 0.85rem;
white-space: nowrap;
width: 100%;
position: absolute;
display: block;
margin-top: 1em;
}

73
src/web/image_proxy.rs Normal file
View File

@ -0,0 +1,73 @@
use std::{collections::HashMap, sync::Arc};
use axum::{
extract::{Query, State},
http::StatusCode,
response::{IntoResponse, Response},
};
use tracing::error;
use crate::{config::Config, engines};
pub async fn route(
Query(params): Query<HashMap<String, String>>,
State(config): State<Arc<Config>>,
) -> Response {
let proxy_config = &config.image_search.proxy;
if !proxy_config.enabled.unwrap() {
return (StatusCode::FORBIDDEN, "Image proxy is disabled").into_response();
};
let url = params.get("url").cloned().unwrap_or_default();
if url.is_empty() {
return (StatusCode::BAD_REQUEST, "Missing `url` parameter").into_response();
}
let mut res = match engines::CLIENT
.get(&url)
.header("accept", "image/*")
.send()
.await
{
Ok(res) => res,
Err(err) => {
error!("Image proxy error for {url}: {err}");
return (StatusCode::INTERNAL_SERVER_ERROR, "Image proxy error").into_response();
}
};
let max_size = proxy_config.max_download_size.unwrap();
if res.content_length().unwrap_or_default() > max_size {
return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response();
}
// validate content-type
let content_type = res
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or_default()
.to_string();
if !content_type.starts_with("image/") {
return (StatusCode::BAD_REQUEST, "Not an image").into_response();
}
let mut image_bytes = Vec::new();
while let Ok(Some(chunk)) = res.chunk().await {
image_bytes.extend_from_slice(&chunk);
if image_bytes.len() as u64 > max_size {
return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response();
}
}
(
[
(axum::http::header::CONTENT_TYPE, content_type),
(
axum::http::header::CACHE_CONTROL,
"public, max-age=31536000".to_owned(),
),
],
image_bytes,
)
.into_response()
}

View File

@ -1,4 +1,5 @@
pub mod autocomplete; pub mod autocomplete;
mod image_proxy;
pub mod index; pub mod index;
pub mod opensearch; pub mod opensearch;
pub mod search; pub mod search;
@ -45,6 +46,7 @@ pub async fn run(config: Config) {
.route("/opensearch.xml", get(opensearch::route)) .route("/opensearch.xml", get(opensearch::route))
.route("/search", get(search::route)) .route("/search", get(search::route))
.route("/autocomplete", get(autocomplete::route)) .route("/autocomplete", get(autocomplete::route))
.route("/image-proxy", get(image_proxy::route))
.with_state(Arc::new(config)); .with_state(Arc::new(config));
info!("Listening on http://{bind_addr}"); info!("Listening on http://{bind_addr}");

View File

@ -1,4 +1,7 @@
use std::{collections::HashMap, net::SocketAddr, sync::Arc}; mod all;
mod images;
use std::{collections::HashMap, net::SocketAddr, str::FromStr, sync::Arc};
use async_stream::stream; use async_stream::stream;
use axum::{ use axum::{
@ -8,142 +11,68 @@ use axum::{
response::IntoResponse, response::IntoResponse,
}; };
use bytes::Bytes; use bytes::Bytes;
use maud::{html, PreEscaped}; use maud::{html, PreEscaped, DOCTYPE};
use crate::{ use crate::{
config::Config, config::Config,
engines::{self, Engine, EngineProgressUpdate, ProgressUpdateData, Response, SearchQuery}, engines::{
self, Engine, EngineProgressUpdate, ProgressUpdateData, ResponseForTab, SearchQuery,
SearchTab,
},
}; };
fn render_beginning_of_html(query: &str) -> String { fn render_beginning_of_html(search: &SearchQuery) -> String {
let head_html = html! { let head_html = html! {
head { head {
meta charset="UTF-8"; meta charset="UTF-8";
meta name="viewport" content="width=device-width, initial-scale=1.0"; meta name="viewport" content="width=device-width, initial-scale=1.0";
title { title {
(query) (search.query)
" - metasearch" " - metasearch"
} }
link rel="stylesheet" href="/style.css"; link rel="stylesheet" href="/style.css";
script src="/script.js" defer {} script src="/script.js" defer {}
link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml"; link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml";
} }
}.into_string(); };
let form_html = html! { let form_html = html! {
form."search-form" action="/search" method="get" { form."search-form" action="/search" method="get" {
input #"search-input" type="text" name="q" placeholder="Search" value=(query) autofocus onfocus="this.select()" autocomplete="off"; input #"search-input" type="text" name="q" placeholder="Search" value=(search.query) autofocus onfocus="this.select()" autocomplete="off";
input type="submit" value="Search"; input type="submit" value="Search";
} }
}.into_string(); @if search.config.image_search.enabled.unwrap() {
div.search-tabs {
@if search.tab == SearchTab::All { span.search-tab.selected { "All" } }
@else { a.search-tab href={ "?q=" (search.query) } { "All" } }
@if search.tab == SearchTab::Images { span.search-tab.selected { "Images" } }
@else { a.search-tab href={ "?q=" (search.query) "&tab=images" } { "Images" } }
}
}
};
format!( // we don't close the elements here because we do chunked responses
r#"<!DOCTYPE html> html! {
<html lang="en"> (DOCTYPE)
{head_html} html lang="en";
<body> (head_html)
<div class="results-container"> body;
<main> div.results-container.{"search-" (search.tab.to_string())};
{form_html} main;
<div class="progress-updates"> (form_html)
"# div.progress-updates;
) }
.into_string()
} }
fn render_end_of_html() -> String { fn render_end_of_html() -> String {
r"</main></div></body></html>".to_string() r"</main></div></body></html>".to_string()
} }
fn render_engine_list(engines: &[engines::Engine], config: &Config) -> PreEscaped<String> { fn render_results_for_tab(response: ResponseForTab) -> PreEscaped<String> {
let mut html = String::new(); match response {
for (i, engine) in engines.iter().enumerate() { ResponseForTab::All(r) => all::render_results(r),
if config.ui.show_engine_list_separator.unwrap() && i > 0 { ResponseForTab::Images(r) => images::render_results(r),
html.push_str(" &middot; ");
} }
let raw_engine_id = &engine.id();
let engine_id = if config.ui.show_engine_list_separator.unwrap() {
raw_engine_id.replace('_', " ")
} else {
raw_engine_id.to_string()
};
html.push_str(&html! { span."engine-list-item" { (engine_id) } }.into_string())
}
html! {
div."engine-list" {
(PreEscaped(html))
}
}
}
fn render_search_result(result: &engines::SearchResult, config: &Config) -> PreEscaped<String> {
html! {
div."search-result" {
a."search-result-anchor" rel="noreferrer" href=(result.url) {
span."search-result-url" { (result.url) }
h3."search-result-title" { (result.title) }
}
p."search-result-description" { (result.description) }
(render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), config))
}
}
}
fn render_featured_snippet(
featured_snippet: &engines::FeaturedSnippet,
config: &Config,
) -> PreEscaped<String> {
html! {
div."featured-snippet" {
p."search-result-description" { (featured_snippet.description) }
a."search-result-anchor" rel="noreferrer" href=(featured_snippet.url) {
span."search-result-url" { (featured_snippet.url) }
h3."search-result-title" { (featured_snippet.title) }
}
(render_engine_list(&[featured_snippet.engine], config))
}
}
}
fn render_results(response: Response) -> PreEscaped<String> {
let mut html = String::new();
if let Some(infobox) = &response.infobox {
html.push_str(
&html! {
div."infobox" {
(infobox.html)
(render_engine_list(&[infobox.engine], &response.config))
}
}
.into_string(),
);
}
if let Some(answer) = &response.answer {
html.push_str(
&html! {
div."answer" {
(answer.html)
(render_engine_list(&[answer.engine], &response.config))
}
}
.into_string(),
);
}
if let Some(featured_snippet) = &response.featured_snippet {
html.push_str(&render_featured_snippet(featured_snippet, &response.config).into_string());
}
for result in &response.search_results {
html.push_str(&render_search_result(result, &response.config).into_string());
}
if html.is_empty() {
html.push_str(
&html! {
p { "No results." }
}
.into_string(),
);
}
PreEscaped(html)
} }
fn render_engine_progress_update( fn render_engine_progress_update(
@ -173,6 +102,27 @@ fn render_engine_progress_update(
.into_string() .into_string()
} }
pub fn render_engine_list(engines: &[engines::Engine], config: &Config) -> PreEscaped<String> {
let mut html = String::new();
for (i, engine) in engines.iter().enumerate() {
if config.ui.show_engine_list_separator.unwrap() && i > 0 {
html.push_str(" &middot; ");
}
let raw_engine_id = &engine.id();
let engine_id = if config.ui.show_engine_list_separator.unwrap() {
raw_engine_id.replace('_', " ")
} else {
raw_engine_id.to_string()
};
html.push_str(&html! { span.engine-list-item { (engine_id) } }.into_string())
}
html! {
div.engine-list {
(PreEscaped(html))
}
}
}
pub async fn route( pub async fn route(
Query(params): Query<HashMap<String, String>>, Query(params): Query<HashMap<String, String>>,
State(config): State<Arc<Config>>, State(config): State<Arc<Config>>,
@ -197,8 +147,14 @@ pub async fn route(
); );
} }
let search_tab = params
.get("tab")
.and_then(|t| SearchTab::from_str(t).ok())
.unwrap_or_default();
let query = SearchQuery { let query = SearchQuery {
query, query,
tab: search_tab,
request_headers: headers request_headers: headers
.clone() .clone()
.into_iter() .into_iter()
@ -253,16 +209,11 @@ pub async fn route(
second_part.push_str("</div>"); // close progress-updates second_part.push_str("</div>"); // close progress-updates
second_part.push_str("<style>.progress-updates{display:none}</style>"); second_part.push_str("<style>.progress-updates{display:none}</style>");
second_part.push_str(&render_results(results).into_string()); second_part.push_str(&render_results_for_tab(results).into_string());
yield Ok(Bytes::from(second_part)); yield Ok(Bytes::from(second_part));
}, },
ProgressUpdateData::PostSearchInfobox(infobox) => { ProgressUpdateData::PostSearchInfobox(infobox) => {
third_part.push_str(&html! { third_part.push_str(&all::render_infobox(&infobox, &config).into_string());
div."infobox"."postsearch-infobox" {
(infobox.html)
(render_engine_list(&[infobox.engine], &config))
}
}.into_string());
} }
} }
} }

93
src/web/search/all.rs Normal file
View File

@ -0,0 +1,93 @@
//! Rendering results in the "all" tab.
use maud::{html, PreEscaped};
use crate::{
config::Config,
engines::{self, EngineSearchResult, Infobox, Response},
web::search::render_engine_list,
};
pub fn render_results(response: Response) -> PreEscaped<String> {
let mut html = String::new();
if let Some(infobox) = &response.infobox {
html.push_str(
&html! {
div."infobox" {
(infobox.html)
(render_engine_list(&[infobox.engine], &response.config))
}
}
.into_string(),
);
}
if let Some(answer) = &response.answer {
html.push_str(
&html! {
div."answer" {
(answer.html)
(render_engine_list(&[answer.engine], &response.config))
}
}
.into_string(),
);
}
if let Some(featured_snippet) = &response.featured_snippet {
html.push_str(&render_featured_snippet(featured_snippet, &response.config).into_string());
}
for result in &response.search_results {
html.push_str(&render_search_result(result, &response.config).into_string());
}
if html.is_empty() {
html.push_str(
&html! {
p { "No results." }
}
.into_string(),
);
}
PreEscaped(html)
}
fn render_search_result(
result: &engines::SearchResult<EngineSearchResult>,
config: &Config,
) -> PreEscaped<String> {
html! {
div."search-result" {
a."search-result-anchor" rel="noreferrer" href=(result.result.url) {
span."search-result-url" { (result.result.url) }
h3."search-result-title" { (result.result.title) }
}
p."search-result-description" { (result.result.description) }
(render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), config))
}
}
}
fn render_featured_snippet(
featured_snippet: &engines::FeaturedSnippet,
config: &Config,
) -> PreEscaped<String> {
html! {
div."featured-snippet" {
p."search-result-description" { (featured_snippet.description) }
a."search-result-anchor" rel="noreferrer" href=(featured_snippet.url) {
span."search-result-url" { (featured_snippet.url) }
h3."search-result-title" { (featured_snippet.title) }
}
(render_engine_list(&[featured_snippet.engine], config))
}
}
}
pub fn render_infobox(infobox: &Infobox, config: &Config) -> PreEscaped<String> {
html! {
div."infobox"."postsearch-infobox" {
(infobox.html)
(render_engine_list(&[infobox.engine], &config))
}
}
}

48
src/web/search/images.rs Normal file
View File

@ -0,0 +1,48 @@
use maud::{html, PreEscaped};
use crate::{
config::Config,
engines::{self, EngineImageResult, ImagesResponse},
web::search::render_engine_list,
};
pub fn render_results(response: ImagesResponse) -> PreEscaped<String> {
html! {
div.image-results {
@for image in &response.image_results {
(render_image_result(image, &response.config))
}
}
}
}
fn render_image_result(
result: &engines::SearchResult<EngineImageResult>,
config: &Config,
) -> PreEscaped<String> {
let original_image_src = &result.result.image_url;
let image_src = if config.image_search.proxy.enabled.unwrap() {
// serialize url params
let escaped_param =
url::form_urlencoded::byte_serialize(original_image_src.as_bytes()).collect::<String>();
format!("/image-proxy?url={}", escaped_param)
} else {
original_image_src.to_string()
};
html! {
div.image-result {
a.image-result-anchor rel="noreferrer" href=(original_image_src) target="_blank" {
div.image-result-img-container {
img loading="lazy" src=(image_src) width=(result.result.width) height=(result.result.height);
}
}
a.image-result-page-anchor href=(result.result.page_url) {
span.image-result-page-url.search-result-url { (result.result.page_url) }
span.image-result-title { (result.result.title) }
}
@if config.image_search.show_engines.unwrap() {
{(render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), &config))}
}
}
}
}