Image search (#10)
* add tab for image search * add basic implementation of image search * add proxy * fix google images regex breaking when the query has spaces * fix sizing of image elements while they're loading * add optional engines indicator to image search * add bing images * fix some parsing issues with bing images * fix bing titles
This commit is contained in:
parent
878510bcb2
commit
ef65e60f9f
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1841,6 +1841,7 @@ version = "1.0.114"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
|
checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"indexmap",
|
||||||
"itoa",
|
"itoa",
|
||||||
"ryu",
|
"ryu",
|
||||||
"serde",
|
"serde",
|
||||||
|
10
Cargo.toml
10
Cargo.toml
@ -28,15 +28,11 @@ numbat = "1.11.0"
|
|||||||
once_cell = "1.19.0"
|
once_cell = "1.19.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
regex = "1.10.3"
|
regex = "1.10.3"
|
||||||
reqwest = { version = "0.11.26", default-features = false, features = [
|
reqwest = { version = "0.11.26", default-features = false, features = ["rustls-tls", "gzip", "deflate", "brotli"] }
|
||||||
"rustls-tls",
|
|
||||||
"gzip",
|
|
||||||
"deflate",
|
|
||||||
"brotli",
|
|
||||||
] }
|
|
||||||
scraper = "0.19.0"
|
scraper = "0.19.0"
|
||||||
serde = { version = "1.0.197", features = ["derive"] }
|
serde = { version = "1.0.197", features = ["derive"] }
|
||||||
serde_json = "1.0.114"
|
# preserve_order is needed for google images. yippee!
|
||||||
|
serde_json = { version = "1.0.114", features = ["preserve_order"] }
|
||||||
tokio = { version = "1.36.0", features = ["rt", "macros"] }
|
tokio = { version = "1.36.0", features = ["rt", "macros"] }
|
||||||
tokio-stream = "0.1.15"
|
tokio-stream = "0.1.15"
|
||||||
toml = { version = "0.8.12", default-features = false, features = ["parse"] }
|
toml = { version = "0.8.12", default-features = false, features = ["parse"] }
|
||||||
|
@ -6,6 +6,11 @@ bind = "0.0.0.0:28019"
|
|||||||
show_engine_list_separator = false
|
show_engine_list_separator = false
|
||||||
show_version_info = false
|
show_version_info = false
|
||||||
|
|
||||||
|
[image_search]
|
||||||
|
enabled = false
|
||||||
|
show_engines = true
|
||||||
|
proxy = { enabled = true, max_download_size = 10_000_000 }
|
||||||
|
|
||||||
[engines]
|
[engines]
|
||||||
google = { weight = 1.05 }
|
google = { weight = 1.05 }
|
||||||
bing = { weight = 1.0 }
|
bing = { weight = 1.0 }
|
||||||
|
114
src/config.rs
114
src/config.rs
@ -12,6 +12,8 @@ pub struct Config {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub ui: UiConfig,
|
pub ui: UiConfig,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
pub image_search: ImageSearchConfig,
|
||||||
|
#[serde(default)]
|
||||||
pub engines: EnginesConfig,
|
pub engines: EnginesConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -23,12 +25,51 @@ pub struct UiConfig {
|
|||||||
pub show_version_info: Option<bool>,
|
pub show_version_info: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Default)]
|
||||||
|
pub struct ImageSearchConfig {
|
||||||
|
pub enabled: Option<bool>,
|
||||||
|
pub show_engines: Option<bool>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub proxy: ImageProxyConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Default)]
|
||||||
|
pub struct ImageProxyConfig {
|
||||||
|
/// Whether we should proxy remote images through our server. This is mostly
|
||||||
|
/// a privacy feature.
|
||||||
|
pub enabled: Option<bool>,
|
||||||
|
/// The maximum size of an image that can be proxied. This is in bytes.
|
||||||
|
pub max_download_size: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Default)]
|
#[derive(Deserialize, Debug, Default)]
|
||||||
pub struct EnginesConfig {
|
pub struct EnginesConfig {
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
pub map: HashMap<Engine, DefaultableEngineConfig>,
|
pub map: HashMap<Engine, DefaultableEngineConfig>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Clone, Debug)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum DefaultableEngineConfig {
|
||||||
|
Boolean(bool),
|
||||||
|
Full(FullEngineConfig),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Clone, Debug)]
|
||||||
|
pub struct FullEngineConfig {
|
||||||
|
#[serde(default = "fn_true")]
|
||||||
|
pub enabled: bool,
|
||||||
|
|
||||||
|
/// The priority of this engine relative to the other engines. The default
|
||||||
|
/// is 1, and a value of 0 is treated as the default.
|
||||||
|
#[serde(default)]
|
||||||
|
pub weight: f64,
|
||||||
|
/// Per-engine configs. These are parsed at request time.
|
||||||
|
#[serde(flatten)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub extra: toml::Table,
|
||||||
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
pub fn read_or_create(config_path: &Path) -> eyre::Result<Self> {
|
pub fn read_or_create(config_path: &Path) -> eyre::Result<Self> {
|
||||||
let base_config_str = include_str!("../config-base.toml");
|
let base_config_str = include_str!("../config-base.toml");
|
||||||
@ -50,20 +91,39 @@ impl Config {
|
|||||||
// use the default for something.
|
// use the default for something.
|
||||||
pub fn update(&mut self, new: Config) {
|
pub fn update(&mut self, new: Config) {
|
||||||
self.bind = new.bind;
|
self.bind = new.bind;
|
||||||
self.ui.show_engine_list_separator = new
|
self.ui.update(new.ui);
|
||||||
.ui
|
self.image_search.update(new.image_search);
|
||||||
|
self.engines.update(new.engines);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UiConfig {
|
||||||
|
pub fn update(&mut self, new: UiConfig) {
|
||||||
|
self.show_engine_list_separator = new
|
||||||
.show_engine_list_separator
|
.show_engine_list_separator
|
||||||
.or(self.ui.show_engine_list_separator);
|
.or(self.show_engine_list_separator);
|
||||||
assert_ne!(self.ui.show_engine_list_separator, None);
|
assert_ne!(self.show_engine_list_separator, None);
|
||||||
self.ui.show_version_info = new.ui.show_version_info.or(self.ui.show_version_info);
|
self.show_version_info = new.show_version_info.or(self.show_version_info);
|
||||||
assert_ne!(self.ui.show_version_info, None);
|
assert_ne!(self.show_version_info, None);
|
||||||
for (key, new) in new.engines.map {
|
|
||||||
if let Some(existing) = self.engines.map.get_mut(&key) {
|
|
||||||
existing.update(new);
|
|
||||||
} else {
|
|
||||||
self.engines.map.insert(key, new);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ImageSearchConfig {
|
||||||
|
pub fn update(&mut self, new: ImageSearchConfig) {
|
||||||
|
self.enabled = new.enabled.or(self.enabled);
|
||||||
|
assert_ne!(self.enabled, None);
|
||||||
|
self.show_engines = new.show_engines.or(self.show_engines);
|
||||||
|
assert_ne!(self.show_engines, None);
|
||||||
|
self.proxy.update(new.proxy);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ImageProxyConfig {
|
||||||
|
pub fn update(&mut self, new: ImageProxyConfig) {
|
||||||
|
self.enabled = new.enabled.or(self.enabled);
|
||||||
|
assert_ne!(self.enabled, None);
|
||||||
|
self.max_download_size = new.max_download_size.or(self.max_download_size);
|
||||||
|
assert_ne!(self.max_download_size, None);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,13 +151,16 @@ impl EnginesConfig {
|
|||||||
None => &DEFAULT_ENABLED_FULL_ENGINE_CONFIG,
|
None => &DEFAULT_ENABLED_FULL_ENGINE_CONFIG,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Clone, Debug)]
|
pub fn update(&mut self, new: Self) {
|
||||||
#[serde(untagged)]
|
for (key, new) in new.map {
|
||||||
pub enum DefaultableEngineConfig {
|
if let Some(existing) = self.map.get_mut(&key) {
|
||||||
Boolean(bool),
|
existing.update(new);
|
||||||
Full(FullEngineConfig),
|
} else {
|
||||||
|
self.map.insert(key, new);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DefaultableEngineConfig {
|
impl DefaultableEngineConfig {
|
||||||
@ -115,24 +178,9 @@ impl Default for DefaultableEngineConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Clone, Debug)]
|
|
||||||
pub struct FullEngineConfig {
|
|
||||||
#[serde(default = "default_true")]
|
|
||||||
pub enabled: bool,
|
|
||||||
|
|
||||||
/// The priority of this engine relative to the other engines. The default
|
|
||||||
/// is 1, and a value of 0 is treated as the default.
|
|
||||||
#[serde(default)]
|
|
||||||
pub weight: f64,
|
|
||||||
/// Per-engine configs. These are parsed at request time.
|
|
||||||
#[serde(flatten)]
|
|
||||||
#[serde(default)]
|
|
||||||
pub extra: toml::Table,
|
|
||||||
}
|
|
||||||
|
|
||||||
// serde expects a function as the default, this just exists so "enabled" is
|
// serde expects a function as the default, this just exists so "enabled" is
|
||||||
// always true by default
|
// always true by default
|
||||||
fn default_true() -> bool {
|
fn fn_true() -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,3 +126,31 @@ macro_rules! engine_postsearch_requests {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! engine_image_requests {
|
||||||
|
($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => {
|
||||||
|
impl Engine {
|
||||||
|
#[must_use]
|
||||||
|
pub fn request_images(&self, query: &SearchQuery) -> RequestResponse {
|
||||||
|
match self {
|
||||||
|
$(
|
||||||
|
Engine::$engine => $module::$engine_id::$request(query).into(),
|
||||||
|
)*
|
||||||
|
_ => RequestResponse::None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_images_response(&self, res: &HttpResponse) -> eyre::Result<EngineImagesResponse> {
|
||||||
|
#[allow(clippy::useless_conversion)]
|
||||||
|
match self {
|
||||||
|
$(
|
||||||
|
Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response }
|
||||||
|
.ok_or_else(|| eyre::eyre!("engine {self:?} can't parse images response"))?,
|
||||||
|
)*
|
||||||
|
_ => eyre::bail!("engine {self:?} can't parse response"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
@ -1,25 +1,26 @@
|
|||||||
use std::{
|
use std::{
|
||||||
collections::{BTreeSet, HashMap},
|
collections::{BTreeSet, HashMap},
|
||||||
fmt,
|
fmt::{self, Display},
|
||||||
net::IpAddr,
|
net::IpAddr,
|
||||||
ops::Deref,
|
ops::Deref,
|
||||||
str::FromStr,
|
str::FromStr,
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
time::Instant,
|
time::{Duration, Instant},
|
||||||
};
|
};
|
||||||
|
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
use maud::PreEscaped;
|
use maud::PreEscaped;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use reqwest::header::HeaderMap;
|
use reqwest::{header::HeaderMap, RequestBuilder};
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use tracing::{error, info};
|
use tracing::{error, info};
|
||||||
|
|
||||||
mod macros;
|
mod macros;
|
||||||
|
mod ranking;
|
||||||
use crate::{
|
use crate::{
|
||||||
config::Config, engine_autocomplete_requests, engine_postsearch_requests, engine_requests,
|
config::Config, engine_autocomplete_requests, engine_image_requests,
|
||||||
engines,
|
engine_postsearch_requests, engine_requests, engines,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub mod answer;
|
pub mod answer;
|
||||||
@ -90,6 +91,11 @@ engine_postsearch_requests! {
|
|||||||
StackExchange => postsearch::stackexchange::request, parse_response,
|
StackExchange => postsearch::stackexchange::request, parse_response,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
engine_image_requests! {
|
||||||
|
Google => search::google::request_images, parse_images_response,
|
||||||
|
Bing => search::bing::request_images, parse_images_response,
|
||||||
|
}
|
||||||
|
|
||||||
impl fmt::Display for Engine {
|
impl fmt::Display for Engine {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(f, "{}", self.id())
|
write!(f, "{}", self.id())
|
||||||
@ -108,6 +114,7 @@ impl<'de> Deserialize<'de> for Engine {
|
|||||||
|
|
||||||
pub struct SearchQuery {
|
pub struct SearchQuery {
|
||||||
pub query: String,
|
pub query: String,
|
||||||
|
pub tab: SearchTab,
|
||||||
pub request_headers: HashMap<String, String>,
|
pub request_headers: HashMap<String, String>,
|
||||||
pub ip: String,
|
pub ip: String,
|
||||||
/// The config is part of the query so it's possible to make a query with a
|
/// The config is part of the query so it's possible to make a query with a
|
||||||
@ -123,6 +130,31 @@ impl Deref for SearchQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum SearchTab {
|
||||||
|
#[default]
|
||||||
|
All,
|
||||||
|
Images,
|
||||||
|
}
|
||||||
|
impl FromStr for SearchTab {
|
||||||
|
type Err = ();
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
"all" => Ok(Self::All),
|
||||||
|
"images" => Ok(Self::Images),
|
||||||
|
_ => Err(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Display for SearchTab {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::All => write!(f, "all"),
|
||||||
|
Self::Images => write!(f, "images"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub enum RequestResponse {
|
pub enum RequestResponse {
|
||||||
None,
|
None,
|
||||||
Http(reqwest::RequestBuilder),
|
Http(reqwest::RequestBuilder),
|
||||||
@ -172,7 +204,7 @@ impl From<HttpResponse> for reqwest::Response {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct EngineSearchResult {
|
pub struct EngineSearchResult {
|
||||||
pub url: String,
|
pub url: String,
|
||||||
pub title: String,
|
pub title: String,
|
||||||
@ -194,6 +226,11 @@ pub struct EngineResponse {
|
|||||||
pub infobox_html: Option<PreEscaped<String>>,
|
pub infobox_html: Option<PreEscaped<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct EngineImagesResponse {
|
||||||
|
pub image_results: Vec<EngineImageResult>,
|
||||||
|
}
|
||||||
|
|
||||||
impl EngineResponse {
|
impl EngineResponse {
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
@ -217,6 +254,22 @@ impl EngineResponse {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl EngineImagesResponse {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct EngineImageResult {
|
||||||
|
pub image_url: String,
|
||||||
|
pub page_url: String,
|
||||||
|
pub title: String,
|
||||||
|
pub width: u64,
|
||||||
|
pub height: u64,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum EngineProgressUpdate {
|
pub enum EngineProgressUpdate {
|
||||||
Requesting,
|
Requesting,
|
||||||
@ -231,7 +284,7 @@ pub enum ProgressUpdateData {
|
|||||||
engine: Engine,
|
engine: Engine,
|
||||||
update: EngineProgressUpdate,
|
update: EngineProgressUpdate,
|
||||||
},
|
},
|
||||||
Response(Response),
|
Response(ResponseForTab),
|
||||||
PostSearchInfobox(Infobox),
|
PostSearchInfobox(Infobox),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -251,17 +304,40 @@ impl ProgressUpdate {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(fields(query = %query.query), skip(progress_tx))]
|
async fn make_request(
|
||||||
pub async fn search(
|
request: RequestBuilder,
|
||||||
|
engine: Engine,
|
||||||
query: &SearchQuery,
|
query: &SearchQuery,
|
||||||
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
send_engine_progress_update: impl Fn(Engine, EngineProgressUpdate),
|
||||||
|
) -> eyre::Result<HttpResponse> {
|
||||||
|
send_engine_progress_update(engine, EngineProgressUpdate::Requesting);
|
||||||
|
|
||||||
|
let mut res = request.send().await?;
|
||||||
|
|
||||||
|
send_engine_progress_update(engine, EngineProgressUpdate::Downloading);
|
||||||
|
|
||||||
|
let mut body_bytes = Vec::new();
|
||||||
|
while let Some(chunk) = res.chunk().await? {
|
||||||
|
body_bytes.extend_from_slice(&chunk);
|
||||||
|
}
|
||||||
|
let body = String::from_utf8_lossy(&body_bytes).to_string();
|
||||||
|
|
||||||
|
send_engine_progress_update(engine, EngineProgressUpdate::Parsing);
|
||||||
|
|
||||||
|
let http_response = HttpResponse {
|
||||||
|
res,
|
||||||
|
body,
|
||||||
|
config: query.config.clone(),
|
||||||
|
};
|
||||||
|
Ok(http_response)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn make_requests(
|
||||||
|
query: &SearchQuery,
|
||||||
|
progress_tx: &mpsc::UnboundedSender<ProgressUpdate>,
|
||||||
|
start_time: Instant,
|
||||||
|
send_engine_progress_update: &impl Fn(Engine, EngineProgressUpdate),
|
||||||
) -> eyre::Result<()> {
|
) -> eyre::Result<()> {
|
||||||
let start_time = Instant::now();
|
|
||||||
|
|
||||||
info!("Doing search");
|
|
||||||
|
|
||||||
let progress_tx = &progress_tx;
|
|
||||||
|
|
||||||
let mut requests = Vec::new();
|
let mut requests = Vec::new();
|
||||||
for &engine in Engine::all() {
|
for &engine in Engine::all() {
|
||||||
let engine_config = query.config.engines.get(engine);
|
let engine_config = query.config.engines.get(engine);
|
||||||
@ -274,59 +350,18 @@ pub async fn search(
|
|||||||
|
|
||||||
let response = match request_response {
|
let response = match request_response {
|
||||||
RequestResponse::Http(request) => {
|
RequestResponse::Http(request) => {
|
||||||
progress_tx.send(ProgressUpdate::new(
|
let http_response =
|
||||||
ProgressUpdateData::Engine {
|
make_request(request, engine, query, send_engine_progress_update).await?;
|
||||||
engine,
|
|
||||||
update: EngineProgressUpdate::Requesting,
|
|
||||||
},
|
|
||||||
start_time,
|
|
||||||
))?;
|
|
||||||
|
|
||||||
let mut res = request.send().await?;
|
|
||||||
|
|
||||||
progress_tx.send(ProgressUpdate::new(
|
|
||||||
ProgressUpdateData::Engine {
|
|
||||||
engine,
|
|
||||||
update: EngineProgressUpdate::Downloading,
|
|
||||||
},
|
|
||||||
start_time,
|
|
||||||
))?;
|
|
||||||
|
|
||||||
let mut body_bytes = Vec::new();
|
|
||||||
while let Some(chunk) = res.chunk().await? {
|
|
||||||
body_bytes.extend_from_slice(&chunk);
|
|
||||||
}
|
|
||||||
let body = String::from_utf8_lossy(&body_bytes).to_string();
|
|
||||||
|
|
||||||
progress_tx.send(ProgressUpdate::new(
|
|
||||||
ProgressUpdateData::Engine {
|
|
||||||
engine,
|
|
||||||
update: EngineProgressUpdate::Parsing,
|
|
||||||
},
|
|
||||||
start_time,
|
|
||||||
))?;
|
|
||||||
|
|
||||||
let http_response = HttpResponse {
|
|
||||||
res,
|
|
||||||
body,
|
|
||||||
config: query.config.clone(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let response = match engine.parse_response(&http_response) {
|
let response = match engine.parse_response(&http_response) {
|
||||||
Ok(response) => response,
|
Ok(response) => response,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("parse error: {e}");
|
error!("parse error for {engine}: {e}");
|
||||||
EngineResponse::new()
|
EngineResponse::new()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
progress_tx.send(ProgressUpdate::new(
|
send_engine_progress_update(engine, EngineProgressUpdate::Done);
|
||||||
ProgressUpdateData::Engine {
|
|
||||||
engine,
|
|
||||||
update: EngineProgressUpdate::Done,
|
|
||||||
},
|
|
||||||
start_time,
|
|
||||||
))?;
|
|
||||||
|
|
||||||
response
|
response
|
||||||
}
|
}
|
||||||
@ -347,12 +382,10 @@ pub async fn search(
|
|||||||
join_all(response_futures).await.into_iter().collect();
|
join_all(response_futures).await.into_iter().collect();
|
||||||
let responses = responses_result?;
|
let responses = responses_result?;
|
||||||
|
|
||||||
let response = merge_engine_responses(query.config.clone(), responses);
|
let response = ranking::merge_engine_responses(query.config.clone(), responses);
|
||||||
|
|
||||||
let has_infobox = response.infobox.is_some();
|
let has_infobox = response.infobox.is_some();
|
||||||
|
|
||||||
progress_tx.send(ProgressUpdate::new(
|
progress_tx.send(ProgressUpdate::new(
|
||||||
ProgressUpdateData::Response(response.clone()),
|
ProgressUpdateData::Response(ResponseForTab::All(response.clone())),
|
||||||
start_time,
|
start_time,
|
||||||
))?;
|
))?;
|
||||||
|
|
||||||
@ -420,6 +453,98 @@ pub async fn search(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn make_image_requests(
|
||||||
|
query: &SearchQuery,
|
||||||
|
progress_tx: &mpsc::UnboundedSender<ProgressUpdate>,
|
||||||
|
start_time: Instant,
|
||||||
|
send_engine_progress_update: &impl Fn(Engine, EngineProgressUpdate),
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
let mut requests = Vec::new();
|
||||||
|
for &engine in Engine::all() {
|
||||||
|
let engine_config = query.config.engines.get(engine);
|
||||||
|
if !engine_config.enabled {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
requests.push(async move {
|
||||||
|
let request_response = engine.request_images(query);
|
||||||
|
|
||||||
|
let response = match request_response {
|
||||||
|
RequestResponse::Http(request) => {
|
||||||
|
let http_response =
|
||||||
|
make_request(request, engine, query, send_engine_progress_update).await?;
|
||||||
|
|
||||||
|
let response = match engine.parse_images_response(&http_response) {
|
||||||
|
Ok(response) => response,
|
||||||
|
Err(e) => {
|
||||||
|
error!("parse error for {engine} (images): {e}");
|
||||||
|
EngineImagesResponse::new()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
send_engine_progress_update(engine, EngineProgressUpdate::Done);
|
||||||
|
|
||||||
|
response
|
||||||
|
}
|
||||||
|
RequestResponse::Instant(_) => {
|
||||||
|
error!("unexpected instant response for image request");
|
||||||
|
EngineImagesResponse::new()
|
||||||
|
}
|
||||||
|
RequestResponse::None => EngineImagesResponse::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((engine, response))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut response_futures = Vec::new();
|
||||||
|
for request in requests {
|
||||||
|
response_futures.push(request);
|
||||||
|
}
|
||||||
|
|
||||||
|
let responses_result: eyre::Result<HashMap<_, _>> =
|
||||||
|
join_all(response_futures).await.into_iter().collect();
|
||||||
|
let responses = responses_result?;
|
||||||
|
|
||||||
|
let response = ranking::merge_images_responses(query.config.clone(), responses);
|
||||||
|
progress_tx.send(ProgressUpdate::new(
|
||||||
|
ProgressUpdateData::Response(ResponseForTab::Images(response.clone())),
|
||||||
|
start_time,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(fields(query = %query.query), skip(progress_tx))]
|
||||||
|
pub async fn search(
|
||||||
|
query: &SearchQuery,
|
||||||
|
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
info!("Doing search");
|
||||||
|
|
||||||
|
let progress_tx = &progress_tx;
|
||||||
|
let send_engine_progress_update = |engine: Engine, update: EngineProgressUpdate| {
|
||||||
|
let _ = progress_tx.send(ProgressUpdate::new(
|
||||||
|
ProgressUpdateData::Engine { engine, update },
|
||||||
|
start_time,
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
match query.tab {
|
||||||
|
SearchTab::All => {
|
||||||
|
make_requests(query, progress_tx, start_time, &send_engine_progress_update).await?
|
||||||
|
}
|
||||||
|
SearchTab::Images => {
|
||||||
|
make_image_requests(query, progress_tx, start_time, &send_engine_progress_update)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result<Vec<String>> {
|
pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result<Vec<String>> {
|
||||||
let mut requests = Vec::new();
|
let mut requests = Vec::new();
|
||||||
for &engine in Engine::all() {
|
for &engine in Engine::all() {
|
||||||
@ -452,7 +577,10 @@ pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result<Vec<Stri
|
|||||||
join_all(autocomplete_futures).await.into_iter().collect();
|
join_all(autocomplete_futures).await.into_iter().collect();
|
||||||
let autocomplete_results = autocomplete_results_result?;
|
let autocomplete_results = autocomplete_results_result?;
|
||||||
|
|
||||||
Ok(merge_autocomplete_responses(config, autocomplete_results))
|
Ok(ranking::merge_autocomplete_responses(
|
||||||
|
config,
|
||||||
|
autocomplete_results,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {
|
pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {
|
||||||
@ -466,13 +594,14 @@ pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {
|
|||||||
headers.insert("Accept-Language", "en-US,en;q=0.5".parse().unwrap());
|
headers.insert("Accept-Language", "en-US,en;q=0.5".parse().unwrap());
|
||||||
headers
|
headers
|
||||||
})
|
})
|
||||||
|
.timeout(Duration::from_secs(10))
|
||||||
.build()
|
.build()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
});
|
});
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Response {
|
pub struct Response {
|
||||||
pub search_results: Vec<SearchResult>,
|
pub search_results: Vec<SearchResult<EngineSearchResult>>,
|
||||||
pub featured_snippet: Option<FeaturedSnippet>,
|
pub featured_snippet: Option<FeaturedSnippet>,
|
||||||
pub answer: Option<Answer>,
|
pub answer: Option<Answer>,
|
||||||
pub infobox: Option<Infobox>,
|
pub infobox: Option<Infobox>,
|
||||||
@ -480,10 +609,20 @@ pub struct Response {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SearchResult {
|
pub struct ImagesResponse {
|
||||||
pub url: String,
|
pub image_results: Vec<SearchResult<EngineImageResult>>,
|
||||||
pub title: String,
|
pub config: Arc<Config>,
|
||||||
pub description: String,
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ResponseForTab {
|
||||||
|
All(Response),
|
||||||
|
Images(ImagesResponse),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct SearchResult<R> {
|
||||||
|
pub result: R,
|
||||||
pub engines: BTreeSet<Engine>,
|
pub engines: BTreeSet<Engine>,
|
||||||
pub score: f64,
|
pub score: f64,
|
||||||
}
|
}
|
||||||
@ -508,149 +647,7 @@ pub struct Infobox {
|
|||||||
pub engine: Engine,
|
pub engine: Engine,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_engine_responses(
|
|
||||||
config: Arc<Config>,
|
|
||||||
responses: HashMap<Engine, EngineResponse>,
|
|
||||||
) -> Response {
|
|
||||||
let mut search_results: Vec<SearchResult> = Vec::new();
|
|
||||||
let mut featured_snippet: Option<FeaturedSnippet> = None;
|
|
||||||
let mut answer: Option<Answer> = None;
|
|
||||||
let mut infobox: Option<Infobox> = None;
|
|
||||||
|
|
||||||
for (engine, response) in responses {
|
|
||||||
let engine_config = config.engines.get(engine);
|
|
||||||
|
|
||||||
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
|
|
||||||
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
|
||||||
// score of 0.33, etc.
|
|
||||||
let base_result_score = 1. / (result_index + 1) as f64;
|
|
||||||
let result_score = base_result_score * engine_config.weight;
|
|
||||||
|
|
||||||
if let Some(existing_result) = search_results
|
|
||||||
.iter_mut()
|
|
||||||
.find(|r| r.url == search_result.url)
|
|
||||||
{
|
|
||||||
// if the weight of this engine is higher than every other one then replace the
|
|
||||||
// title and description
|
|
||||||
if engine_config.weight
|
|
||||||
> existing_result
|
|
||||||
.engines
|
|
||||||
.iter()
|
|
||||||
.map(|&other_engine| {
|
|
||||||
let other_engine_config = config.engines.get(other_engine);
|
|
||||||
other_engine_config.weight
|
|
||||||
})
|
|
||||||
.max_by(|a, b| a.partial_cmp(b).unwrap())
|
|
||||||
.unwrap_or(0.)
|
|
||||||
{
|
|
||||||
existing_result.title = search_result.title;
|
|
||||||
existing_result.description = search_result.description;
|
|
||||||
}
|
|
||||||
|
|
||||||
existing_result.engines.insert(engine);
|
|
||||||
existing_result.score += result_score;
|
|
||||||
} else {
|
|
||||||
search_results.push(SearchResult {
|
|
||||||
url: search_result.url,
|
|
||||||
title: search_result.title,
|
|
||||||
description: search_result.description,
|
|
||||||
engines: [engine].iter().copied().collect(),
|
|
||||||
score: result_score,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(engine_featured_snippet) = response.featured_snippet {
|
|
||||||
// if it has a higher weight than the current featured snippet
|
|
||||||
let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| {
|
|
||||||
let other_engine_config = config.engines.get(s.engine);
|
|
||||||
other_engine_config.weight
|
|
||||||
});
|
|
||||||
if engine_config.weight > featured_snippet_weight {
|
|
||||||
featured_snippet = Some(FeaturedSnippet {
|
|
||||||
url: engine_featured_snippet.url,
|
|
||||||
title: engine_featured_snippet.title,
|
|
||||||
description: engine_featured_snippet.description,
|
|
||||||
engine,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(engine_answer_html) = response.answer_html {
|
|
||||||
// if it has a higher weight than the current answer
|
|
||||||
let answer_weight = answer.as_ref().map_or(0., |s| {
|
|
||||||
let other_engine_config = config.engines.get(s.engine);
|
|
||||||
other_engine_config.weight
|
|
||||||
});
|
|
||||||
if engine_config.weight > answer_weight {
|
|
||||||
answer = Some(Answer {
|
|
||||||
html: engine_answer_html,
|
|
||||||
engine,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(engine_infobox_html) = response.infobox_html {
|
|
||||||
// if it has a higher weight than the current infobox
|
|
||||||
let infobox_weight = infobox.as_ref().map_or(0., |s| {
|
|
||||||
let other_engine_config = config.engines.get(s.engine);
|
|
||||||
other_engine_config.weight
|
|
||||||
});
|
|
||||||
if engine_config.weight > infobox_weight {
|
|
||||||
infobox = Some(Infobox {
|
|
||||||
html: engine_infobox_html,
|
|
||||||
engine,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
|
||||||
|
|
||||||
Response {
|
|
||||||
search_results,
|
|
||||||
featured_snippet,
|
|
||||||
answer,
|
|
||||||
infobox,
|
|
||||||
config,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct AutocompleteResult {
|
pub struct AutocompleteResult {
|
||||||
pub query: String,
|
pub query: String,
|
||||||
pub score: f64,
|
pub score: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_autocomplete_responses(
|
|
||||||
config: &Config,
|
|
||||||
responses: HashMap<Engine, Vec<String>>,
|
|
||||||
) -> Vec<String> {
|
|
||||||
let mut autocomplete_results: Vec<AutocompleteResult> = Vec::new();
|
|
||||||
|
|
||||||
for (engine, response) in responses {
|
|
||||||
let engine_config = config.engines.get(engine);
|
|
||||||
|
|
||||||
for (result_index, autocomplete_result) in response.into_iter().enumerate() {
|
|
||||||
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
|
||||||
// score of 0.33, etc.
|
|
||||||
let base_result_score = 1. / (result_index + 1) as f64;
|
|
||||||
let result_score = base_result_score * engine_config.weight;
|
|
||||||
|
|
||||||
if let Some(existing_result) = autocomplete_results
|
|
||||||
.iter_mut()
|
|
||||||
.find(|r| r.query == autocomplete_result)
|
|
||||||
{
|
|
||||||
existing_result.score += result_score;
|
|
||||||
} else {
|
|
||||||
autocomplete_results.push(AutocompleteResult {
|
|
||||||
query: autocomplete_result,
|
|
||||||
score: result_score,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
|
||||||
|
|
||||||
autocomplete_results.into_iter().map(|r| r.query).collect()
|
|
||||||
}
|
|
||||||
|
@ -5,8 +5,8 @@ use crate::engines::{HttpResponse, Response, CLIENT};
|
|||||||
|
|
||||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||||
for search_result in response.search_results.iter().take(8) {
|
for search_result in response.search_results.iter().take(8) {
|
||||||
if search_result.url.starts_with("https://docs.rs/") {
|
if search_result.result.url.starts_with("https://docs.rs/") {
|
||||||
return Some(CLIENT.get(search_result.url.as_str()));
|
return Some(CLIENT.get(search_result.result.url.as_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,8 +6,8 @@ use crate::engines::{answer::regex, Response, CLIENT};
|
|||||||
|
|
||||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||||
for search_result in response.search_results.iter().take(8) {
|
for search_result in response.search_results.iter().take(8) {
|
||||||
if regex!(r"^https:\/\/github\.com\/[\w-]+\/[\w.-]+$").is_match(&search_result.url) {
|
if regex!(r"^https:\/\/github\.com\/[\w-]+\/[\w.-]+$").is_match(&search_result.result.url) {
|
||||||
return Some(CLIENT.get(search_result.url.as_str()));
|
return Some(CLIENT.get(search_result.result.url.as_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,10 +13,11 @@ pub struct MdnConfig {
|
|||||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||||
for search_result in response.search_results.iter().take(8) {
|
for search_result in response.search_results.iter().take(8) {
|
||||||
if search_result
|
if search_result
|
||||||
|
.result
|
||||||
.url
|
.url
|
||||||
.starts_with("https://developer.mozilla.org/en-US/docs/Web")
|
.starts_with("https://developer.mozilla.org/en-US/docs/Web")
|
||||||
{
|
{
|
||||||
return Some(CLIENT.get(search_result.url.as_str()));
|
return Some(CLIENT.get(search_result.result.url.as_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,8 +5,12 @@ use crate::engines::{HttpResponse, Response, CLIENT};
|
|||||||
|
|
||||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||||
for search_result in response.search_results.iter().take(8) {
|
for search_result in response.search_results.iter().take(8) {
|
||||||
if search_result.url.starts_with("https://minecraft.wiki/w/") {
|
if search_result
|
||||||
return Some(CLIENT.get(search_result.url.as_str()));
|
.result
|
||||||
|
.url
|
||||||
|
.starts_with("https://minecraft.wiki/w/")
|
||||||
|
{
|
||||||
|
return Some(CLIENT.get(search_result.result.url.as_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,9 +7,9 @@ use crate::engines::{answer::regex, Response, CLIENT};
|
|||||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||||
for search_result in response.search_results.iter().take(8) {
|
for search_result in response.search_results.iter().take(8) {
|
||||||
if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+")
|
if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+")
|
||||||
.is_match(&search_result.url)
|
.is_match(&search_result.result.url)
|
||||||
{
|
{
|
||||||
return Some(CLIENT.get(search_result.url.as_str()));
|
return Some(CLIENT.get(search_result.result.url.as_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
204
src/engines/ranking.rs
Normal file
204
src/engines/ranking.rs
Normal file
@ -0,0 +1,204 @@
|
|||||||
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
|
use crate::config::Config;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
Answer, AutocompleteResult, Engine, EngineImageResult, EngineImagesResponse, EngineResponse,
|
||||||
|
EngineSearchResult, FeaturedSnippet, ImagesResponse, Infobox, Response, SearchResult,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn merge_engine_responses(
|
||||||
|
config: Arc<Config>,
|
||||||
|
responses: HashMap<Engine, EngineResponse>,
|
||||||
|
) -> Response {
|
||||||
|
let mut search_results: Vec<SearchResult<EngineSearchResult>> = Vec::new();
|
||||||
|
let mut featured_snippet: Option<FeaturedSnippet> = None;
|
||||||
|
let mut answer: Option<Answer> = None;
|
||||||
|
let mut infobox: Option<Infobox> = None;
|
||||||
|
|
||||||
|
for (engine, response) in responses {
|
||||||
|
let engine_config = config.engines.get(engine);
|
||||||
|
|
||||||
|
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
|
||||||
|
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
||||||
|
// score of 0.33, etc.
|
||||||
|
let base_result_score = 1. / (result_index + 1) as f64;
|
||||||
|
let result_score = base_result_score * engine_config.weight;
|
||||||
|
|
||||||
|
if let Some(existing_result) = search_results
|
||||||
|
.iter_mut()
|
||||||
|
.find(|r| r.result.url == search_result.url)
|
||||||
|
{
|
||||||
|
// if the weight of this engine is higher than every other one then replace the
|
||||||
|
// title and description
|
||||||
|
if engine_config.weight
|
||||||
|
> existing_result
|
||||||
|
.engines
|
||||||
|
.iter()
|
||||||
|
.map(|&other_engine| {
|
||||||
|
let other_engine_config = config.engines.get(other_engine);
|
||||||
|
other_engine_config.weight
|
||||||
|
})
|
||||||
|
.max_by(|a, b| a.partial_cmp(b).unwrap())
|
||||||
|
.unwrap_or(0.)
|
||||||
|
{
|
||||||
|
existing_result.result.title = search_result.title;
|
||||||
|
existing_result.result.description = search_result.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
existing_result.engines.insert(engine);
|
||||||
|
existing_result.score += result_score;
|
||||||
|
} else {
|
||||||
|
search_results.push(SearchResult {
|
||||||
|
result: search_result,
|
||||||
|
engines: [engine].iter().copied().collect(),
|
||||||
|
score: result_score,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(engine_featured_snippet) = response.featured_snippet {
|
||||||
|
// if it has a higher weight than the current featured snippet
|
||||||
|
let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| {
|
||||||
|
let other_engine_config = config.engines.get(s.engine);
|
||||||
|
other_engine_config.weight
|
||||||
|
});
|
||||||
|
if engine_config.weight > featured_snippet_weight {
|
||||||
|
featured_snippet = Some(FeaturedSnippet {
|
||||||
|
url: engine_featured_snippet.url,
|
||||||
|
title: engine_featured_snippet.title,
|
||||||
|
description: engine_featured_snippet.description,
|
||||||
|
engine,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(engine_answer_html) = response.answer_html {
|
||||||
|
// if it has a higher weight than the current answer
|
||||||
|
let answer_weight = answer.as_ref().map_or(0., |s| {
|
||||||
|
let other_engine_config = config.engines.get(s.engine);
|
||||||
|
other_engine_config.weight
|
||||||
|
});
|
||||||
|
if engine_config.weight > answer_weight {
|
||||||
|
answer = Some(Answer {
|
||||||
|
html: engine_answer_html,
|
||||||
|
engine,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(engine_infobox_html) = response.infobox_html {
|
||||||
|
// if it has a higher weight than the current infobox
|
||||||
|
let infobox_weight = infobox.as_ref().map_or(0., |s| {
|
||||||
|
let other_engine_config = config.engines.get(s.engine);
|
||||||
|
other_engine_config.weight
|
||||||
|
});
|
||||||
|
if engine_config.weight > infobox_weight {
|
||||||
|
infobox = Some(Infobox {
|
||||||
|
html: engine_infobox_html,
|
||||||
|
engine,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||||
|
|
||||||
|
Response {
|
||||||
|
search_results,
|
||||||
|
featured_snippet,
|
||||||
|
answer,
|
||||||
|
infobox,
|
||||||
|
config,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merge_autocomplete_responses(
|
||||||
|
config: &Config,
|
||||||
|
responses: HashMap<Engine, Vec<String>>,
|
||||||
|
) -> Vec<String> {
|
||||||
|
let mut autocomplete_results: Vec<AutocompleteResult> = Vec::new();
|
||||||
|
|
||||||
|
for (engine, response) in responses {
|
||||||
|
let engine_config = config.engines.get(engine);
|
||||||
|
|
||||||
|
for (result_index, autocomplete_result) in response.into_iter().enumerate() {
|
||||||
|
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
||||||
|
// score of 0.33, etc.
|
||||||
|
let base_result_score = 1. / (result_index + 1) as f64;
|
||||||
|
let result_score = base_result_score * engine_config.weight;
|
||||||
|
|
||||||
|
if let Some(existing_result) = autocomplete_results
|
||||||
|
.iter_mut()
|
||||||
|
.find(|r| r.query == autocomplete_result)
|
||||||
|
{
|
||||||
|
existing_result.score += result_score;
|
||||||
|
} else {
|
||||||
|
autocomplete_results.push(AutocompleteResult {
|
||||||
|
query: autocomplete_result,
|
||||||
|
score: result_score,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||||
|
|
||||||
|
autocomplete_results.into_iter().map(|r| r.query).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merge_images_responses(
|
||||||
|
config: Arc<Config>,
|
||||||
|
responses: HashMap<Engine, EngineImagesResponse>,
|
||||||
|
) -> ImagesResponse {
|
||||||
|
let mut image_results: Vec<SearchResult<EngineImageResult>> = Vec::new();
|
||||||
|
|
||||||
|
for (engine, response) in responses {
|
||||||
|
let engine_config = config.engines.get(engine);
|
||||||
|
|
||||||
|
for (result_index, image_result) in response.image_results.into_iter().enumerate() {
|
||||||
|
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
||||||
|
// score of 0.33, etc.
|
||||||
|
let base_result_score = 1. / (result_index + 1) as f64;
|
||||||
|
let result_score = base_result_score * engine_config.weight;
|
||||||
|
|
||||||
|
if let Some(existing_result) = image_results
|
||||||
|
.iter_mut()
|
||||||
|
.find(|r| r.result.image_url == image_result.image_url)
|
||||||
|
{
|
||||||
|
// if the weight of this engine is higher than every other one then replace the
|
||||||
|
// title and page url
|
||||||
|
if engine_config.weight
|
||||||
|
> existing_result
|
||||||
|
.engines
|
||||||
|
.iter()
|
||||||
|
.map(|&other_engine| {
|
||||||
|
let other_engine_config = config.engines.get(other_engine);
|
||||||
|
other_engine_config.weight
|
||||||
|
})
|
||||||
|
.max_by(|a, b| a.partial_cmp(b).unwrap())
|
||||||
|
.unwrap_or(0.)
|
||||||
|
{
|
||||||
|
existing_result.result.title = image_result.title;
|
||||||
|
existing_result.result.page_url = image_result.page_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
existing_result.engines.insert(engine);
|
||||||
|
existing_result.score += result_score;
|
||||||
|
} else {
|
||||||
|
image_results.push(SearchResult {
|
||||||
|
result: image_result,
|
||||||
|
engines: [engine].iter().copied().collect(),
|
||||||
|
score: result_score,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
image_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||||
|
|
||||||
|
ImagesResponse {
|
||||||
|
image_results,
|
||||||
|
config,
|
||||||
|
}
|
||||||
|
}
|
@ -1,9 +1,11 @@
|
|||||||
use base64::Engine;
|
use base64::Engine;
|
||||||
use scraper::{ElementRef, Selector};
|
use eyre::eyre;
|
||||||
|
use scraper::{ElementRef, Html, Selector};
|
||||||
|
use tracing::warn;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
engines::{EngineResponse, CLIENT},
|
engines::{EngineImageResult, EngineImagesResponse, EngineResponse, CLIENT},
|
||||||
parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
|
parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -64,6 +66,89 @@ pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn request_images(query: &str) -> reqwest::RequestBuilder {
|
||||||
|
CLIENT.get(
|
||||||
|
Url::parse_with_params(
|
||||||
|
"https://www.bing.com/images/async",
|
||||||
|
&[
|
||||||
|
("q", query),
|
||||||
|
("async", "content"),
|
||||||
|
("first", "1"),
|
||||||
|
("count", "35"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument]
|
||||||
|
pub fn parse_images_response(body: &str) -> eyre::Result<EngineImagesResponse> {
|
||||||
|
let dom = Html::parse_document(body);
|
||||||
|
|
||||||
|
let mut image_results = Vec::new();
|
||||||
|
|
||||||
|
let image_container_el_sel = Selector::parse(".imgpt").unwrap();
|
||||||
|
let image_el_sel = Selector::parse(".iusc").unwrap();
|
||||||
|
for image_container_el in dom.select(&image_container_el_sel) {
|
||||||
|
let image_el = image_container_el
|
||||||
|
.select(&image_el_sel)
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| eyre!("no image element found"))?;
|
||||||
|
|
||||||
|
// parse the "m" attribute as json
|
||||||
|
let Some(data) = image_el.value().attr("m") else {
|
||||||
|
// this is normal, i think
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let data = serde_json::from_str::<serde_json::Value>(data)?;
|
||||||
|
let page_url = data
|
||||||
|
.get("purl")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let image_url = data
|
||||||
|
// short for media url, probably
|
||||||
|
.get("murl")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let page_title = data
|
||||||
|
.get("t")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or_default()
|
||||||
|
// bing adds these unicode characters around matches
|
||||||
|
.replace('', "")
|
||||||
|
.replace('', "");
|
||||||
|
|
||||||
|
// the text looks like "1200 x 1600 · jpegWikipedia"
|
||||||
|
// (the last part is incorrectly parsed since the actual text is inside another
|
||||||
|
// element but this is already good enough for our purposes)
|
||||||
|
let text = image_container_el.text().collect::<String>();
|
||||||
|
let width_height: Vec<u64> = text
|
||||||
|
.split(" · ")
|
||||||
|
.next()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.split(" x ")
|
||||||
|
.map(|s| s.parse().unwrap_or_default())
|
||||||
|
.collect();
|
||||||
|
let (width, height) = match width_height.as_slice() {
|
||||||
|
[width, height] => (*width, *height),
|
||||||
|
_ => {
|
||||||
|
warn!("couldn't get width and height from text \"{text}\"");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
image_results.push(EngineImageResult {
|
||||||
|
page_url: page_url.to_string(),
|
||||||
|
image_url: image_url.to_string(),
|
||||||
|
title: page_title.to_string(),
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(EngineImagesResponse { image_results })
|
||||||
|
}
|
||||||
|
|
||||||
fn clean_url(url: &str) -> eyre::Result<String> {
|
fn clean_url(url: &str) -> eyre::Result<String> {
|
||||||
// clean up bing's tracking urls
|
// clean up bing's tracking urls
|
||||||
if url.starts_with("https://www.bing.com/ck/a?") {
|
if url.starts_with("https://www.bing.com/ck/a?") {
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
|
use eyre::eyre;
|
||||||
use scraper::{ElementRef, Selector};
|
use scraper::{ElementRef, Selector};
|
||||||
|
use tracing::warn;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
engines::{EngineResponse, CLIENT},
|
engines::{EngineImageResult, EngineImagesResponse, EngineResponse, CLIENT},
|
||||||
parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
|
parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -10,8 +12,11 @@ pub fn request(query: &str) -> reqwest::RequestBuilder {
|
|||||||
CLIENT.get(
|
CLIENT.get(
|
||||||
Url::parse_with_params(
|
Url::parse_with_params(
|
||||||
"https://www.google.com/search",
|
"https://www.google.com/search",
|
||||||
|
&[
|
||||||
|
("q", query),
|
||||||
// nfpr makes it not try to autocorrect
|
// nfpr makes it not try to autocorrect
|
||||||
&[("q", query), ("nfpr", "1")],
|
("nfpr", "1"),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)
|
)
|
||||||
@ -112,6 +117,92 @@ pub fn parse_autocomplete_response(body: &str) -> eyre::Result<Vec<String>> {
|
|||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn request_images(query: &str) -> reqwest::RequestBuilder {
|
||||||
|
// ok so google also has a json api for images BUT it gives us less results
|
||||||
|
CLIENT.get(
|
||||||
|
Url::parse_with_params(
|
||||||
|
"https://www.google.com/search",
|
||||||
|
&[("q", query), ("udm", "2"), ("prmd", "ivsnmbtz")],
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_images_response(body: &str) -> eyre::Result<EngineImagesResponse> {
|
||||||
|
// we can't just scrape the html because it won't give us the image sources,
|
||||||
|
// so... we have to scrape their internal json
|
||||||
|
|
||||||
|
// iterate through every script until we find something that matches our regex
|
||||||
|
let internal_json_regex =
|
||||||
|
regex::Regex::new(r#"(?:\(function\(\)\{google\.jl=\{.+?)var \w=(\{".+?\});"#)?;
|
||||||
|
let mut internal_json = None;
|
||||||
|
let dom = scraper::Html::parse_document(body);
|
||||||
|
for script in dom.select(&Selector::parse("script").unwrap()) {
|
||||||
|
let script = script.inner_html();
|
||||||
|
if let Some(captures) = internal_json_regex.captures(&script).and_then(|c| c.get(1)) {
|
||||||
|
internal_json = Some(captures.as_str().to_string());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let internal_json =
|
||||||
|
internal_json.ok_or_else(|| eyre!("couldn't get internal json for google images"))?;
|
||||||
|
let internal_json: serde_json::Map<String, serde_json::Value> =
|
||||||
|
serde_json::from_str(&internal_json)?;
|
||||||
|
|
||||||
|
let mut image_results = Vec::new();
|
||||||
|
for element_json in internal_json.values() {
|
||||||
|
// the internal json uses arrays instead of maps, which makes it kinda hard to
|
||||||
|
// use and also probably pretty unstable
|
||||||
|
|
||||||
|
let Some(element_json) = element_json
|
||||||
|
.as_array()
|
||||||
|
.and_then(|a| a.get(1))
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some((image_url, width, height)) = element_json
|
||||||
|
.get(3)
|
||||||
|
.and_then(|v| serde_json::from_value(v.clone()).ok())
|
||||||
|
else {
|
||||||
|
warn!("couldn't get image data from google images json");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
// this is probably pretty brittle, hopefully google doesn't break it any time
|
||||||
|
// soon
|
||||||
|
let Some(page) = element_json
|
||||||
|
.get(9)
|
||||||
|
.and_then(|v| v.as_object())
|
||||||
|
.and_then(|o| o.get("2003"))
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
else {
|
||||||
|
warn!("couldn't get page data from google images json");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Some(page_url) = page.get(2).and_then(|v| v.as_str()).map(|s| s.to_string()) else {
|
||||||
|
warn!("couldn't get page url from google images json");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Some(title) = page.get(3).and_then(|v| v.as_str()).map(|s| s.to_string()) else {
|
||||||
|
warn!("couldn't get page title from google images json");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
image_results.push(EngineImageResult {
|
||||||
|
image_url,
|
||||||
|
page_url,
|
||||||
|
title,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(EngineImagesResponse { image_results })
|
||||||
|
}
|
||||||
|
|
||||||
fn clean_url(url: &str) -> eyre::Result<String> {
|
fn clean_url(url: &str) -> eyre::Result<String> {
|
||||||
if url.starts_with("/url?q=") {
|
if url.starts_with("/url?q=") {
|
||||||
// get the q param
|
// get the q param
|
||||||
|
@ -58,6 +58,13 @@ main {
|
|||||||
background-color: var(--bg-2);
|
background-color: var(--bg-2);
|
||||||
min-height: 100%;
|
min-height: 100%;
|
||||||
}
|
}
|
||||||
|
.search-images > main {
|
||||||
|
/* image search uses 100% width */
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
.results-container.search-images {
|
||||||
|
max-width: none;
|
||||||
|
}
|
||||||
@media screen and (max-width: 74rem) {
|
@media screen and (max-width: 74rem) {
|
||||||
/* small screens */
|
/* small screens */
|
||||||
.results-container {
|
.results-container {
|
||||||
@ -145,6 +152,21 @@ h1 {
|
|||||||
background: var(--bg-4);
|
background: var(--bg-4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* search tabs (like images, if enabled) */
|
||||||
|
.search-tabs {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
margin-top: -0.5rem;
|
||||||
|
}
|
||||||
|
.search-tab {
|
||||||
|
border: 1px solid var(--bg-4);
|
||||||
|
padding: 0.25rem;
|
||||||
|
}
|
||||||
|
a.search-tab {
|
||||||
|
color: var(--link);
|
||||||
|
}
|
||||||
|
|
||||||
/* search result */
|
/* search result */
|
||||||
.search-result {
|
.search-result {
|
||||||
padding-top: 1rem;
|
padding-top: 1rem;
|
||||||
@ -298,7 +320,7 @@ h3.answer-thesaurus-category-title {
|
|||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
.answer-notepad {
|
.answer-notepad {
|
||||||
width: calc( 100% - 4px );
|
width: calc(100% - 4px);
|
||||||
height: fit-content;
|
height: fit-content;
|
||||||
overflow-y: show;
|
overflow-y: show;
|
||||||
background-color: transparent;
|
background-color: transparent;
|
||||||
@ -373,9 +395,56 @@ h3.answer-thesaurus-category-title {
|
|||||||
.infobox-minecraft_wiki-article > .notaninfobox {
|
.infobox-minecraft_wiki-article > .notaninfobox {
|
||||||
display: none !important;
|
display: none !important;
|
||||||
}
|
}
|
||||||
.noexcerpt, .navigation-not-searchable {
|
.noexcerpt,
|
||||||
|
.navigation-not-searchable {
|
||||||
display: none !important;
|
display: none !important;
|
||||||
}
|
}
|
||||||
.mcw-mainpage-icon {
|
.mcw-mainpage-icon {
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* image results */
|
||||||
|
.image-results {
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
.image-result {
|
||||||
|
min-width: 12rem;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
.image-result-img-container {
|
||||||
|
margin: 0 auto;
|
||||||
|
width: fit-content;
|
||||||
|
}
|
||||||
|
.image-result img {
|
||||||
|
height: 10.3rem;
|
||||||
|
width: auto;
|
||||||
|
}
|
||||||
|
.image-result-page-anchor {
|
||||||
|
display: block;
|
||||||
|
height: 2em;
|
||||||
|
}
|
||||||
|
.image-result-page-url {
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
|
||||||
|
font-size: 0.8rem;
|
||||||
|
|
||||||
|
white-space: nowrap;
|
||||||
|
width: 100%;
|
||||||
|
position: absolute;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
.image-result-title {
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
|
||||||
|
font-size: 0.85rem;
|
||||||
|
|
||||||
|
white-space: nowrap;
|
||||||
|
width: 100%;
|
||||||
|
position: absolute;
|
||||||
|
display: block;
|
||||||
|
margin-top: 1em;
|
||||||
|
}
|
||||||
|
73
src/web/image_proxy.rs
Normal file
73
src/web/image_proxy.rs
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
extract::{Query, State},
|
||||||
|
http::StatusCode,
|
||||||
|
response::{IntoResponse, Response},
|
||||||
|
};
|
||||||
|
use tracing::error;
|
||||||
|
|
||||||
|
use crate::{config::Config, engines};
|
||||||
|
|
||||||
|
pub async fn route(
|
||||||
|
Query(params): Query<HashMap<String, String>>,
|
||||||
|
State(config): State<Arc<Config>>,
|
||||||
|
) -> Response {
|
||||||
|
let proxy_config = &config.image_search.proxy;
|
||||||
|
if !proxy_config.enabled.unwrap() {
|
||||||
|
return (StatusCode::FORBIDDEN, "Image proxy is disabled").into_response();
|
||||||
|
};
|
||||||
|
let url = params.get("url").cloned().unwrap_or_default();
|
||||||
|
if url.is_empty() {
|
||||||
|
return (StatusCode::BAD_REQUEST, "Missing `url` parameter").into_response();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut res = match engines::CLIENT
|
||||||
|
.get(&url)
|
||||||
|
.header("accept", "image/*")
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(res) => res,
|
||||||
|
Err(err) => {
|
||||||
|
error!("Image proxy error for {url}: {err}");
|
||||||
|
return (StatusCode::INTERNAL_SERVER_ERROR, "Image proxy error").into_response();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let max_size = proxy_config.max_download_size.unwrap();
|
||||||
|
|
||||||
|
if res.content_length().unwrap_or_default() > max_size {
|
||||||
|
return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response();
|
||||||
|
}
|
||||||
|
// validate content-type
|
||||||
|
let content_type = res
|
||||||
|
.headers()
|
||||||
|
.get(reqwest::header::CONTENT_TYPE)
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.unwrap_or_default()
|
||||||
|
.to_string();
|
||||||
|
if !content_type.starts_with("image/") {
|
||||||
|
return (StatusCode::BAD_REQUEST, "Not an image").into_response();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut image_bytes = Vec::new();
|
||||||
|
while let Ok(Some(chunk)) = res.chunk().await {
|
||||||
|
image_bytes.extend_from_slice(&chunk);
|
||||||
|
if image_bytes.len() as u64 > max_size {
|
||||||
|
return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(
|
||||||
|
[
|
||||||
|
(axum::http::header::CONTENT_TYPE, content_type),
|
||||||
|
(
|
||||||
|
axum::http::header::CACHE_CONTROL,
|
||||||
|
"public, max-age=31536000".to_owned(),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
image_bytes,
|
||||||
|
)
|
||||||
|
.into_response()
|
||||||
|
}
|
@ -1,4 +1,5 @@
|
|||||||
pub mod autocomplete;
|
pub mod autocomplete;
|
||||||
|
mod image_proxy;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod opensearch;
|
pub mod opensearch;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
@ -45,6 +46,7 @@ pub async fn run(config: Config) {
|
|||||||
.route("/opensearch.xml", get(opensearch::route))
|
.route("/opensearch.xml", get(opensearch::route))
|
||||||
.route("/search", get(search::route))
|
.route("/search", get(search::route))
|
||||||
.route("/autocomplete", get(autocomplete::route))
|
.route("/autocomplete", get(autocomplete::route))
|
||||||
|
.route("/image-proxy", get(image_proxy::route))
|
||||||
.with_state(Arc::new(config));
|
.with_state(Arc::new(config));
|
||||||
|
|
||||||
info!("Listening on http://{bind_addr}");
|
info!("Listening on http://{bind_addr}");
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
use std::{collections::HashMap, net::SocketAddr, sync::Arc};
|
mod all;
|
||||||
|
mod images;
|
||||||
|
|
||||||
|
use std::{collections::HashMap, net::SocketAddr, str::FromStr, sync::Arc};
|
||||||
|
|
||||||
use async_stream::stream;
|
use async_stream::stream;
|
||||||
use axum::{
|
use axum::{
|
||||||
@ -8,142 +11,68 @@ use axum::{
|
|||||||
response::IntoResponse,
|
response::IntoResponse,
|
||||||
};
|
};
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use maud::{html, PreEscaped};
|
use maud::{html, PreEscaped, DOCTYPE};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
config::Config,
|
config::Config,
|
||||||
engines::{self, Engine, EngineProgressUpdate, ProgressUpdateData, Response, SearchQuery},
|
engines::{
|
||||||
|
self, Engine, EngineProgressUpdate, ProgressUpdateData, ResponseForTab, SearchQuery,
|
||||||
|
SearchTab,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
fn render_beginning_of_html(query: &str) -> String {
|
fn render_beginning_of_html(search: &SearchQuery) -> String {
|
||||||
let head_html = html! {
|
let head_html = html! {
|
||||||
head {
|
head {
|
||||||
meta charset="UTF-8";
|
meta charset="UTF-8";
|
||||||
meta name="viewport" content="width=device-width, initial-scale=1.0";
|
meta name="viewport" content="width=device-width, initial-scale=1.0";
|
||||||
title {
|
title {
|
||||||
(query)
|
(search.query)
|
||||||
" - metasearch"
|
" - metasearch"
|
||||||
}
|
}
|
||||||
link rel="stylesheet" href="/style.css";
|
link rel="stylesheet" href="/style.css";
|
||||||
script src="/script.js" defer {}
|
script src="/script.js" defer {}
|
||||||
link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml";
|
link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml";
|
||||||
}
|
}
|
||||||
}.into_string();
|
};
|
||||||
let form_html = html! {
|
let form_html = html! {
|
||||||
form."search-form" action="/search" method="get" {
|
form."search-form" action="/search" method="get" {
|
||||||
input #"search-input" type="text" name="q" placeholder="Search" value=(query) autofocus onfocus="this.select()" autocomplete="off";
|
input #"search-input" type="text" name="q" placeholder="Search" value=(search.query) autofocus onfocus="this.select()" autocomplete="off";
|
||||||
input type="submit" value="Search";
|
input type="submit" value="Search";
|
||||||
}
|
}
|
||||||
}.into_string();
|
@if search.config.image_search.enabled.unwrap() {
|
||||||
|
div.search-tabs {
|
||||||
|
@if search.tab == SearchTab::All { span.search-tab.selected { "All" } }
|
||||||
|
@else { a.search-tab href={ "?q=" (search.query) } { "All" } }
|
||||||
|
@if search.tab == SearchTab::Images { span.search-tab.selected { "Images" } }
|
||||||
|
@else { a.search-tab href={ "?q=" (search.query) "&tab=images" } { "Images" } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
format!(
|
// we don't close the elements here because we do chunked responses
|
||||||
r#"<!DOCTYPE html>
|
html! {
|
||||||
<html lang="en">
|
(DOCTYPE)
|
||||||
{head_html}
|
html lang="en";
|
||||||
<body>
|
(head_html)
|
||||||
<div class="results-container">
|
body;
|
||||||
<main>
|
div.results-container.{"search-" (search.tab.to_string())};
|
||||||
{form_html}
|
main;
|
||||||
<div class="progress-updates">
|
(form_html)
|
||||||
"#
|
div.progress-updates;
|
||||||
)
|
}
|
||||||
|
.into_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_end_of_html() -> String {
|
fn render_end_of_html() -> String {
|
||||||
r"</main></div></body></html>".to_string()
|
r"</main></div></body></html>".to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_engine_list(engines: &[engines::Engine], config: &Config) -> PreEscaped<String> {
|
fn render_results_for_tab(response: ResponseForTab) -> PreEscaped<String> {
|
||||||
let mut html = String::new();
|
match response {
|
||||||
for (i, engine) in engines.iter().enumerate() {
|
ResponseForTab::All(r) => all::render_results(r),
|
||||||
if config.ui.show_engine_list_separator.unwrap() && i > 0 {
|
ResponseForTab::Images(r) => images::render_results(r),
|
||||||
html.push_str(" · ");
|
|
||||||
}
|
}
|
||||||
let raw_engine_id = &engine.id();
|
|
||||||
let engine_id = if config.ui.show_engine_list_separator.unwrap() {
|
|
||||||
raw_engine_id.replace('_', " ")
|
|
||||||
} else {
|
|
||||||
raw_engine_id.to_string()
|
|
||||||
};
|
|
||||||
html.push_str(&html! { span."engine-list-item" { (engine_id) } }.into_string())
|
|
||||||
}
|
|
||||||
html! {
|
|
||||||
div."engine-list" {
|
|
||||||
(PreEscaped(html))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn render_search_result(result: &engines::SearchResult, config: &Config) -> PreEscaped<String> {
|
|
||||||
html! {
|
|
||||||
div."search-result" {
|
|
||||||
a."search-result-anchor" rel="noreferrer" href=(result.url) {
|
|
||||||
span."search-result-url" { (result.url) }
|
|
||||||
h3."search-result-title" { (result.title) }
|
|
||||||
}
|
|
||||||
p."search-result-description" { (result.description) }
|
|
||||||
(render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), config))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn render_featured_snippet(
|
|
||||||
featured_snippet: &engines::FeaturedSnippet,
|
|
||||||
config: &Config,
|
|
||||||
) -> PreEscaped<String> {
|
|
||||||
html! {
|
|
||||||
div."featured-snippet" {
|
|
||||||
p."search-result-description" { (featured_snippet.description) }
|
|
||||||
a."search-result-anchor" rel="noreferrer" href=(featured_snippet.url) {
|
|
||||||
span."search-result-url" { (featured_snippet.url) }
|
|
||||||
h3."search-result-title" { (featured_snippet.title) }
|
|
||||||
}
|
|
||||||
(render_engine_list(&[featured_snippet.engine], config))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn render_results(response: Response) -> PreEscaped<String> {
|
|
||||||
let mut html = String::new();
|
|
||||||
if let Some(infobox) = &response.infobox {
|
|
||||||
html.push_str(
|
|
||||||
&html! {
|
|
||||||
div."infobox" {
|
|
||||||
(infobox.html)
|
|
||||||
(render_engine_list(&[infobox.engine], &response.config))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.into_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if let Some(answer) = &response.answer {
|
|
||||||
html.push_str(
|
|
||||||
&html! {
|
|
||||||
div."answer" {
|
|
||||||
(answer.html)
|
|
||||||
(render_engine_list(&[answer.engine], &response.config))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.into_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if let Some(featured_snippet) = &response.featured_snippet {
|
|
||||||
html.push_str(&render_featured_snippet(featured_snippet, &response.config).into_string());
|
|
||||||
}
|
|
||||||
for result in &response.search_results {
|
|
||||||
html.push_str(&render_search_result(result, &response.config).into_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
if html.is_empty() {
|
|
||||||
html.push_str(
|
|
||||||
&html! {
|
|
||||||
p { "No results." }
|
|
||||||
}
|
|
||||||
.into_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
PreEscaped(html)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_engine_progress_update(
|
fn render_engine_progress_update(
|
||||||
@ -173,6 +102,27 @@ fn render_engine_progress_update(
|
|||||||
.into_string()
|
.into_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn render_engine_list(engines: &[engines::Engine], config: &Config) -> PreEscaped<String> {
|
||||||
|
let mut html = String::new();
|
||||||
|
for (i, engine) in engines.iter().enumerate() {
|
||||||
|
if config.ui.show_engine_list_separator.unwrap() && i > 0 {
|
||||||
|
html.push_str(" · ");
|
||||||
|
}
|
||||||
|
let raw_engine_id = &engine.id();
|
||||||
|
let engine_id = if config.ui.show_engine_list_separator.unwrap() {
|
||||||
|
raw_engine_id.replace('_', " ")
|
||||||
|
} else {
|
||||||
|
raw_engine_id.to_string()
|
||||||
|
};
|
||||||
|
html.push_str(&html! { span.engine-list-item { (engine_id) } }.into_string())
|
||||||
|
}
|
||||||
|
html! {
|
||||||
|
div.engine-list {
|
||||||
|
(PreEscaped(html))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn route(
|
pub async fn route(
|
||||||
Query(params): Query<HashMap<String, String>>,
|
Query(params): Query<HashMap<String, String>>,
|
||||||
State(config): State<Arc<Config>>,
|
State(config): State<Arc<Config>>,
|
||||||
@ -197,8 +147,14 @@ pub async fn route(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let search_tab = params
|
||||||
|
.get("tab")
|
||||||
|
.and_then(|t| SearchTab::from_str(t).ok())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
let query = SearchQuery {
|
let query = SearchQuery {
|
||||||
query,
|
query,
|
||||||
|
tab: search_tab,
|
||||||
request_headers: headers
|
request_headers: headers
|
||||||
.clone()
|
.clone()
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -253,16 +209,11 @@ pub async fn route(
|
|||||||
|
|
||||||
second_part.push_str("</div>"); // close progress-updates
|
second_part.push_str("</div>"); // close progress-updates
|
||||||
second_part.push_str("<style>.progress-updates{display:none}</style>");
|
second_part.push_str("<style>.progress-updates{display:none}</style>");
|
||||||
second_part.push_str(&render_results(results).into_string());
|
second_part.push_str(&render_results_for_tab(results).into_string());
|
||||||
yield Ok(Bytes::from(second_part));
|
yield Ok(Bytes::from(second_part));
|
||||||
},
|
},
|
||||||
ProgressUpdateData::PostSearchInfobox(infobox) => {
|
ProgressUpdateData::PostSearchInfobox(infobox) => {
|
||||||
third_part.push_str(&html! {
|
third_part.push_str(&all::render_infobox(&infobox, &config).into_string());
|
||||||
div."infobox"."postsearch-infobox" {
|
|
||||||
(infobox.html)
|
|
||||||
(render_engine_list(&[infobox.engine], &config))
|
|
||||||
}
|
|
||||||
}.into_string());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
93
src/web/search/all.rs
Normal file
93
src/web/search/all.rs
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
//! Rendering results in the "all" tab.
|
||||||
|
|
||||||
|
use maud::{html, PreEscaped};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
config::Config,
|
||||||
|
engines::{self, EngineSearchResult, Infobox, Response},
|
||||||
|
web::search::render_engine_list,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn render_results(response: Response) -> PreEscaped<String> {
|
||||||
|
let mut html = String::new();
|
||||||
|
if let Some(infobox) = &response.infobox {
|
||||||
|
html.push_str(
|
||||||
|
&html! {
|
||||||
|
div."infobox" {
|
||||||
|
(infobox.html)
|
||||||
|
(render_engine_list(&[infobox.engine], &response.config))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.into_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if let Some(answer) = &response.answer {
|
||||||
|
html.push_str(
|
||||||
|
&html! {
|
||||||
|
div."answer" {
|
||||||
|
(answer.html)
|
||||||
|
(render_engine_list(&[answer.engine], &response.config))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.into_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if let Some(featured_snippet) = &response.featured_snippet {
|
||||||
|
html.push_str(&render_featured_snippet(featured_snippet, &response.config).into_string());
|
||||||
|
}
|
||||||
|
for result in &response.search_results {
|
||||||
|
html.push_str(&render_search_result(result, &response.config).into_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
if html.is_empty() {
|
||||||
|
html.push_str(
|
||||||
|
&html! {
|
||||||
|
p { "No results." }
|
||||||
|
}
|
||||||
|
.into_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
PreEscaped(html)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_search_result(
|
||||||
|
result: &engines::SearchResult<EngineSearchResult>,
|
||||||
|
config: &Config,
|
||||||
|
) -> PreEscaped<String> {
|
||||||
|
html! {
|
||||||
|
div."search-result" {
|
||||||
|
a."search-result-anchor" rel="noreferrer" href=(result.result.url) {
|
||||||
|
span."search-result-url" { (result.result.url) }
|
||||||
|
h3."search-result-title" { (result.result.title) }
|
||||||
|
}
|
||||||
|
p."search-result-description" { (result.result.description) }
|
||||||
|
(render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), config))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_featured_snippet(
|
||||||
|
featured_snippet: &engines::FeaturedSnippet,
|
||||||
|
config: &Config,
|
||||||
|
) -> PreEscaped<String> {
|
||||||
|
html! {
|
||||||
|
div."featured-snippet" {
|
||||||
|
p."search-result-description" { (featured_snippet.description) }
|
||||||
|
a."search-result-anchor" rel="noreferrer" href=(featured_snippet.url) {
|
||||||
|
span."search-result-url" { (featured_snippet.url) }
|
||||||
|
h3."search-result-title" { (featured_snippet.title) }
|
||||||
|
}
|
||||||
|
(render_engine_list(&[featured_snippet.engine], config))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn render_infobox(infobox: &Infobox, config: &Config) -> PreEscaped<String> {
|
||||||
|
html! {
|
||||||
|
div."infobox"."postsearch-infobox" {
|
||||||
|
(infobox.html)
|
||||||
|
(render_engine_list(&[infobox.engine], &config))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
48
src/web/search/images.rs
Normal file
48
src/web/search/images.rs
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
use maud::{html, PreEscaped};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
config::Config,
|
||||||
|
engines::{self, EngineImageResult, ImagesResponse},
|
||||||
|
web::search::render_engine_list,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn render_results(response: ImagesResponse) -> PreEscaped<String> {
|
||||||
|
html! {
|
||||||
|
div.image-results {
|
||||||
|
@for image in &response.image_results {
|
||||||
|
(render_image_result(image, &response.config))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_image_result(
|
||||||
|
result: &engines::SearchResult<EngineImageResult>,
|
||||||
|
config: &Config,
|
||||||
|
) -> PreEscaped<String> {
|
||||||
|
let original_image_src = &result.result.image_url;
|
||||||
|
let image_src = if config.image_search.proxy.enabled.unwrap() {
|
||||||
|
// serialize url params
|
||||||
|
let escaped_param =
|
||||||
|
url::form_urlencoded::byte_serialize(original_image_src.as_bytes()).collect::<String>();
|
||||||
|
format!("/image-proxy?url={}", escaped_param)
|
||||||
|
} else {
|
||||||
|
original_image_src.to_string()
|
||||||
|
};
|
||||||
|
html! {
|
||||||
|
div.image-result {
|
||||||
|
a.image-result-anchor rel="noreferrer" href=(original_image_src) target="_blank" {
|
||||||
|
div.image-result-img-container {
|
||||||
|
img loading="lazy" src=(image_src) width=(result.result.width) height=(result.result.height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
a.image-result-page-anchor href=(result.result.page_url) {
|
||||||
|
span.image-result-page-url.search-result-url { (result.result.page_url) }
|
||||||
|
span.image-result-title { (result.result.title) }
|
||||||
|
}
|
||||||
|
@if config.image_search.show_engines.unwrap() {
|
||||||
|
{(render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), &config))}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user