support engine-specific configs

This commit is contained in:
mat 2024-04-12 21:50:38 -05:00
parent fec328522f
commit 052e266985
6 changed files with 34 additions and 49 deletions

15
README
View File

@ -2,6 +2,9 @@ a cute metasearch engine
it sources from google, bing, brave, and a few others. it sources from google, bing, brave, and a few others.
it's written in rust using no templating engine and with as little client-side
javascript as possible.
there's a demo instance at https://s.matdoes.dev, but don't use it as your there's a demo instance at https://s.matdoes.dev, but don't use it as your
default or rely on it, please (so i don't get ratelimited by google). default or rely on it, please (so i don't get ratelimited by google).
@ -15,15 +18,3 @@ run of metasearch2. alternatively, you can copy the example-config.toml in the
repo and rename it to config.toml. repo and rename it to config.toml.
the default port is port 28019. the default port is port 28019.
CONTRIBUTING
it's written in rust using no templating engine and with as little client-side
javascript as possible.
FORKS
here's a probably incomplete list of maintained forks that add new features:
- https://github.com/mrcbax/metasearch2/tree/seo_spam
- https://git.shrecked.dev/Shrecknt/metasearch

View File

@ -4,6 +4,7 @@ bind = "0.0.0.0:28019"
google = { weight = 1.05 } google = { weight = 1.05 }
bing = { weight = 1.0 } bing = { weight = 1.0 }
brave = { weight = 1.25 } brave = { weight = 1.25 }
marginalia = { weight = 0.15 }
# etc [engines.marginalia]
args = { profile = "corpo", js = "default", adtech = "default" }
weight = 0.15

View File

@ -33,21 +33,6 @@ macro_rules! engines {
}; };
} }
#[macro_export]
macro_rules! engine_weights {
($($engine:ident = $weight:expr),* $(,)?) => {
impl Engine {
#[must_use]
pub fn weight(&self) -> f64 {
match self {
$(Engine::$engine => $weight,)*
_ => 1.,
}
}
}
};
}
#[macro_export] #[macro_export]
macro_rules! engine_parse_response { macro_rules! engine_parse_response {
($res:ident, $module:ident::$engine_id:ident::None) => { ($res:ident, $module:ident::$engine_id:ident::None) => {

View File

@ -4,6 +4,7 @@ use std::{
net::IpAddr, net::IpAddr,
ops::Deref, ops::Deref,
str::FromStr, str::FromStr,
sync::Arc,
time::Instant, time::Instant,
}; };
@ -90,6 +91,9 @@ pub struct SearchQuery {
pub query: String, pub query: String,
pub request_headers: HashMap<String, String>, pub request_headers: HashMap<String, String>,
pub ip: String, pub ip: String,
/// The config is part of the query so it's possible to make a query with a
/// custom config.
pub config: Arc<Config>,
} }
impl Deref for SearchQuery { impl Deref for SearchQuery {
@ -228,7 +232,6 @@ impl ProgressUpdate {
} }
pub async fn search( pub async fn search(
config: &Config,
query: &SearchQuery, query: &SearchQuery,
progress_tx: mpsc::UnboundedSender<ProgressUpdate>, progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
) -> eyre::Result<()> { ) -> eyre::Result<()> {
@ -238,7 +241,7 @@ pub async fn search(
let mut requests = Vec::new(); let mut requests = Vec::new();
for &engine in Engine::all() { for &engine in Engine::all() {
let engine_config = config.engines.get(engine); let engine_config = query.config.engines.get(engine);
if !engine_config.enabled { if !engine_config.enabled {
continue; continue;
} }
@ -317,7 +320,7 @@ pub async fn search(
join_all(response_futures).await.into_iter().collect(); join_all(response_futures).await.into_iter().collect();
let responses = responses_result?; let responses = responses_result?;
let response = merge_engine_responses(config, responses); let response = merge_engine_responses(&query.config, responses);
let has_infobox = response.infobox.is_some(); let has_infobox = response.infobox.is_some();
@ -331,7 +334,7 @@ pub async fn search(
let mut postsearch_requests = Vec::new(); let mut postsearch_requests = Vec::new();
for &engine in Engine::all() { for &engine in Engine::all() {
let engine_config = config.engines.get(engine); let engine_config = query.config.engines.get(engine);
if !engine_config.enabled { if !engine_config.enabled {
continue; continue;
} }

View File

@ -2,27 +2,22 @@ use reqwest::Url;
use serde::Deserialize; use serde::Deserialize;
use crate::{ use crate::{
engines::{EngineResponse, RequestResponse, CLIENT}, engines::{Engine, EngineResponse, RequestResponse, SearchQuery, CLIENT},
parse::{parse_html_response_with_opts, ParseOpts}, parse::{parse_html_response_with_opts, ParseOpts},
}; };
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct MarginaliaConfig { pub struct MarginaliaConfig {
pub args: MarginaliaArgs,
}
#[derive(Deserialize)]
pub struct MarginaliaArgs {
pub profile: String, pub profile: String,
pub js: String, pub js: String,
pub adtech: String, pub adtech: String,
} }
impl Default for MarginaliaConfig {
fn default() -> Self {
Self {
profile: "corpo".to_string(),
js: "default".to_string(),
adtech: "default".to_string(),
}
}
}
pub fn request(query: &str) -> RequestResponse { pub fn request(query: &SearchQuery) -> RequestResponse {
// if the query is more than 3 words or has any special characters then abort // if the query is more than 3 words or has any special characters then abort
if query.split_whitespace().count() > 3 if query.split_whitespace().count() > 3
|| !query.chars().all(|c| c.is_ascii_alphanumeric() || c == ' ') || !query.chars().all(|c| c.is_ascii_alphanumeric() || c == ' ')
@ -30,15 +25,24 @@ pub fn request(query: &str) -> RequestResponse {
return RequestResponse::None; return RequestResponse::None;
} }
let config_toml = query.config.engines.get(Engine::Marginalia).extra.clone();
let config: MarginaliaConfig = match toml::Value::Table(config_toml).try_into() {
Ok(args) => args,
Err(err) => {
eprintln!("Failed to parse Marginalia config: {err}");
return RequestResponse::None;
}
};
CLIENT CLIENT
.get( .get(
Url::parse_with_params( Url::parse_with_params(
"https://search.marginalia.nu/search", "https://search.marginalia.nu/search",
&[ &[
("query", query), ("query", query.query.as_str()),
("profile", "corpo"), ("profile", config.args.profile.as_str()),
("js", "default"), ("js", config.args.js.as_str()),
("adtech", "default"), ("adtech", config.args.adtech.as_str()),
], ],
) )
.unwrap(), .unwrap(),

View File

@ -187,6 +187,7 @@ pub async fn route(
|| addr.ip().to_string(), || addr.ip().to_string(),
|ip| ip.to_str().unwrap_or_default().to_string(), |ip| ip.to_str().unwrap_or_default().to_string(),
), ),
config,
}; };
let s = stream! { let s = stream! {
@ -206,7 +207,7 @@ pub async fn route(
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel(); let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
let search_future = tokio::spawn(async move { engines::search(&config, &query, progress_tx).await }); let search_future = tokio::spawn(async move { engines::search( &query, progress_tx).await });
while let Some(progress_update) = progress_rx.recv().await { while let Some(progress_update) = progress_rx.recv().await {
match progress_update.data { match progress_update.data {