From 052e266985a219e3b2c264c568b7cc1bd9b9c969 Mon Sep 17 00:00:00 2001 From: mat Date: Fri, 12 Apr 2024 21:50:38 -0500 Subject: [PATCH] support engine-specific configs --- README | 15 +++----------- default-config.toml | 5 +++-- src/engines/macros.rs | 15 -------------- src/engines/mod.rs | 11 +++++++---- src/engines/search/marginalia.rs | 34 ++++++++++++++++++-------------- src/web/search.rs | 3 ++- 6 files changed, 34 insertions(+), 49 deletions(-) diff --git a/README b/README index 37019c9..637ddfc 100644 --- a/README +++ b/README @@ -2,6 +2,9 @@ a cute metasearch engine it sources from google, bing, brave, and a few others. +it's written in rust using no templating engine and with as little client-side +javascript as possible. + there's a demo instance at https://s.matdoes.dev, but don't use it as your default or rely on it, please (so i don't get ratelimited by google). @@ -15,15 +18,3 @@ run of metasearch2. alternatively, you can copy the example-config.toml in the repo and rename it to config.toml. the default port is port 28019. - -CONTRIBUTING - -it's written in rust using no templating engine and with as little client-side -javascript as possible. - -FORKS - -here's a probably incomplete list of maintained forks that add new features: - -- https://github.com/mrcbax/metasearch2/tree/seo_spam -- https://git.shrecked.dev/Shrecknt/metasearch diff --git a/default-config.toml b/default-config.toml index b7b43a5..acfd3c6 100644 --- a/default-config.toml +++ b/default-config.toml @@ -4,6 +4,7 @@ bind = "0.0.0.0:28019" google = { weight = 1.05 } bing = { weight = 1.0 } brave = { weight = 1.25 } -marginalia = { weight = 0.15 } -# etc +[engines.marginalia] +args = { profile = "corpo", js = "default", adtech = "default" } +weight = 0.15 diff --git a/src/engines/macros.rs b/src/engines/macros.rs index ea3737e..c285a71 100644 --- a/src/engines/macros.rs +++ b/src/engines/macros.rs @@ -33,21 +33,6 @@ macro_rules! engines { }; } -#[macro_export] -macro_rules! engine_weights { - ($($engine:ident = $weight:expr),* $(,)?) => { - impl Engine { - #[must_use] - pub fn weight(&self) -> f64 { - match self { - $(Engine::$engine => $weight,)* - _ => 1., - } - } - } - }; -} - #[macro_export] macro_rules! engine_parse_response { ($res:ident, $module:ident::$engine_id:ident::None) => { diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 2c3737b..fba4a2e 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -4,6 +4,7 @@ use std::{ net::IpAddr, ops::Deref, str::FromStr, + sync::Arc, time::Instant, }; @@ -90,6 +91,9 @@ pub struct SearchQuery { pub query: String, pub request_headers: HashMap, pub ip: String, + /// The config is part of the query so it's possible to make a query with a + /// custom config. + pub config: Arc, } impl Deref for SearchQuery { @@ -228,7 +232,6 @@ impl ProgressUpdate { } pub async fn search( - config: &Config, query: &SearchQuery, progress_tx: mpsc::UnboundedSender, ) -> eyre::Result<()> { @@ -238,7 +241,7 @@ pub async fn search( let mut requests = Vec::new(); for &engine in Engine::all() { - let engine_config = config.engines.get(engine); + let engine_config = query.config.engines.get(engine); if !engine_config.enabled { continue; } @@ -317,7 +320,7 @@ pub async fn search( join_all(response_futures).await.into_iter().collect(); let responses = responses_result?; - let response = merge_engine_responses(config, responses); + let response = merge_engine_responses(&query.config, responses); let has_infobox = response.infobox.is_some(); @@ -331,7 +334,7 @@ pub async fn search( let mut postsearch_requests = Vec::new(); for &engine in Engine::all() { - let engine_config = config.engines.get(engine); + let engine_config = query.config.engines.get(engine); if !engine_config.enabled { continue; } diff --git a/src/engines/search/marginalia.rs b/src/engines/search/marginalia.rs index a9907c4..f91b483 100644 --- a/src/engines/search/marginalia.rs +++ b/src/engines/search/marginalia.rs @@ -2,27 +2,22 @@ use reqwest::Url; use serde::Deserialize; use crate::{ - engines::{EngineResponse, RequestResponse, CLIENT}, + engines::{Engine, EngineResponse, RequestResponse, SearchQuery, CLIENT}, parse::{parse_html_response_with_opts, ParseOpts}, }; #[derive(Deserialize)] pub struct MarginaliaConfig { + pub args: MarginaliaArgs, +} +#[derive(Deserialize)] +pub struct MarginaliaArgs { pub profile: String, pub js: String, pub adtech: String, } -impl Default for MarginaliaConfig { - fn default() -> Self { - Self { - profile: "corpo".to_string(), - js: "default".to_string(), - adtech: "default".to_string(), - } - } -} -pub fn request(query: &str) -> RequestResponse { +pub fn request(query: &SearchQuery) -> RequestResponse { // if the query is more than 3 words or has any special characters then abort if query.split_whitespace().count() > 3 || !query.chars().all(|c| c.is_ascii_alphanumeric() || c == ' ') @@ -30,15 +25,24 @@ pub fn request(query: &str) -> RequestResponse { return RequestResponse::None; } + let config_toml = query.config.engines.get(Engine::Marginalia).extra.clone(); + let config: MarginaliaConfig = match toml::Value::Table(config_toml).try_into() { + Ok(args) => args, + Err(err) => { + eprintln!("Failed to parse Marginalia config: {err}"); + return RequestResponse::None; + } + }; + CLIENT .get( Url::parse_with_params( "https://search.marginalia.nu/search", &[ - ("query", query), - ("profile", "corpo"), - ("js", "default"), - ("adtech", "default"), + ("query", query.query.as_str()), + ("profile", config.args.profile.as_str()), + ("js", config.args.js.as_str()), + ("adtech", config.args.adtech.as_str()), ], ) .unwrap(), diff --git a/src/web/search.rs b/src/web/search.rs index 342eeb5..ca84a4a 100644 --- a/src/web/search.rs +++ b/src/web/search.rs @@ -187,6 +187,7 @@ pub async fn route( || addr.ip().to_string(), |ip| ip.to_str().unwrap_or_default().to_string(), ), + config, }; let s = stream! { @@ -206,7 +207,7 @@ pub async fn route( let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel(); - let search_future = tokio::spawn(async move { engines::search(&config, &query, progress_tx).await }); + let search_future = tokio::spawn(async move { engines::search( &query, progress_tx).await }); while let Some(progress_update) = progress_rx.recv().await { match progress_update.data {