support engine-specific configs

This commit is contained in:
mat 2024-04-12 21:50:38 -05:00
parent fec328522f
commit 052e266985
6 changed files with 34 additions and 49 deletions

15
README
View File

@ -2,6 +2,9 @@ a cute metasearch engine
it sources from google, bing, brave, and a few others.
it's written in rust using no templating engine and with as little client-side
javascript as possible.
there's a demo instance at https://s.matdoes.dev, but don't use it as your
default or rely on it, please (so i don't get ratelimited by google).
@ -15,15 +18,3 @@ run of metasearch2. alternatively, you can copy the example-config.toml in the
repo and rename it to config.toml.
the default port is port 28019.
CONTRIBUTING
it's written in rust using no templating engine and with as little client-side
javascript as possible.
FORKS
here's a probably incomplete list of maintained forks that add new features:
- https://github.com/mrcbax/metasearch2/tree/seo_spam
- https://git.shrecked.dev/Shrecknt/metasearch

View File

@ -4,6 +4,7 @@ bind = "0.0.0.0:28019"
google = { weight = 1.05 }
bing = { weight = 1.0 }
brave = { weight = 1.25 }
marginalia = { weight = 0.15 }
# etc
[engines.marginalia]
args = { profile = "corpo", js = "default", adtech = "default" }
weight = 0.15

View File

@ -33,21 +33,6 @@ macro_rules! engines {
};
}
#[macro_export]
macro_rules! engine_weights {
($($engine:ident = $weight:expr),* $(,)?) => {
impl Engine {
#[must_use]
pub fn weight(&self) -> f64 {
match self {
$(Engine::$engine => $weight,)*
_ => 1.,
}
}
}
};
}
#[macro_export]
macro_rules! engine_parse_response {
($res:ident, $module:ident::$engine_id:ident::None) => {

View File

@ -4,6 +4,7 @@ use std::{
net::IpAddr,
ops::Deref,
str::FromStr,
sync::Arc,
time::Instant,
};
@ -90,6 +91,9 @@ pub struct SearchQuery {
pub query: String,
pub request_headers: HashMap<String, String>,
pub ip: String,
/// The config is part of the query so it's possible to make a query with a
/// custom config.
pub config: Arc<Config>,
}
impl Deref for SearchQuery {
@ -228,7 +232,6 @@ impl ProgressUpdate {
}
pub async fn search(
config: &Config,
query: &SearchQuery,
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
) -> eyre::Result<()> {
@ -238,7 +241,7 @@ pub async fn search(
let mut requests = Vec::new();
for &engine in Engine::all() {
let engine_config = config.engines.get(engine);
let engine_config = query.config.engines.get(engine);
if !engine_config.enabled {
continue;
}
@ -317,7 +320,7 @@ pub async fn search(
join_all(response_futures).await.into_iter().collect();
let responses = responses_result?;
let response = merge_engine_responses(config, responses);
let response = merge_engine_responses(&query.config, responses);
let has_infobox = response.infobox.is_some();
@ -331,7 +334,7 @@ pub async fn search(
let mut postsearch_requests = Vec::new();
for &engine in Engine::all() {
let engine_config = config.engines.get(engine);
let engine_config = query.config.engines.get(engine);
if !engine_config.enabled {
continue;
}

View File

@ -2,27 +2,22 @@ use reqwest::Url;
use serde::Deserialize;
use crate::{
engines::{EngineResponse, RequestResponse, CLIENT},
engines::{Engine, EngineResponse, RequestResponse, SearchQuery, CLIENT},
parse::{parse_html_response_with_opts, ParseOpts},
};
#[derive(Deserialize)]
pub struct MarginaliaConfig {
pub args: MarginaliaArgs,
}
#[derive(Deserialize)]
pub struct MarginaliaArgs {
pub profile: String,
pub js: String,
pub adtech: String,
}
impl Default for MarginaliaConfig {
fn default() -> Self {
Self {
profile: "corpo".to_string(),
js: "default".to_string(),
adtech: "default".to_string(),
}
}
}
pub fn request(query: &str) -> RequestResponse {
pub fn request(query: &SearchQuery) -> RequestResponse {
// if the query is more than 3 words or has any special characters then abort
if query.split_whitespace().count() > 3
|| !query.chars().all(|c| c.is_ascii_alphanumeric() || c == ' ')
@ -30,15 +25,24 @@ pub fn request(query: &str) -> RequestResponse {
return RequestResponse::None;
}
let config_toml = query.config.engines.get(Engine::Marginalia).extra.clone();
let config: MarginaliaConfig = match toml::Value::Table(config_toml).try_into() {
Ok(args) => args,
Err(err) => {
eprintln!("Failed to parse Marginalia config: {err}");
return RequestResponse::None;
}
};
CLIENT
.get(
Url::parse_with_params(
"https://search.marginalia.nu/search",
&[
("query", query),
("profile", "corpo"),
("js", "default"),
("adtech", "default"),
("query", query.query.as_str()),
("profile", config.args.profile.as_str()),
("js", config.args.js.as_str()),
("adtech", config.args.adtech.as_str()),
],
)
.unwrap(),

View File

@ -187,6 +187,7 @@ pub async fn route(
|| addr.ip().to_string(),
|ip| ip.to_str().unwrap_or_default().to_string(),
),
config,
};
let s = stream! {
@ -206,7 +207,7 @@ pub async fn route(
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
let search_future = tokio::spawn(async move { engines::search(&config, &query, progress_tx).await });
let search_future = tokio::spawn(async move { engines::search( &query, progress_tx).await });
while let Some(progress_update) = progress_rx.recv().await {
match progress_update.data {