add config
This commit is contained in:
parent
faccb3f45f
commit
fec328522f
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,6 @@
|
|||||||
/target
|
/target
|
||||||
|
/config.toml
|
||||||
|
|
||||||
|
# convenience script i use for deploying the site to my server, feel free to
|
||||||
|
# write your own here too
|
||||||
/deploy.sh
|
/deploy.sh
|
||||||
|
53
Cargo.lock
generated
53
Cargo.lock
generated
@ -949,6 +949,7 @@ dependencies = [
|
|||||||
"serde_json",
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
|
"toml",
|
||||||
"url",
|
"url",
|
||||||
"urlencoding",
|
"urlencoding",
|
||||||
]
|
]
|
||||||
@ -1467,6 +1468,15 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_spanned"
|
||||||
|
version = "0.6.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_urlencoded"
|
name = "serde_urlencoded"
|
||||||
version = "0.7.1"
|
version = "0.7.1"
|
||||||
@ -1694,6 +1704,40 @@ dependencies = [
|
|||||||
"tracing",
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml"
|
||||||
|
version = "0.8.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
"serde_spanned",
|
||||||
|
"toml_datetime",
|
||||||
|
"toml_edit",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml_datetime"
|
||||||
|
version = "0.6.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml_edit"
|
||||||
|
version = "0.22.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4"
|
||||||
|
dependencies = [
|
||||||
|
"indexmap",
|
||||||
|
"serde",
|
||||||
|
"serde_spanned",
|
||||||
|
"toml_datetime",
|
||||||
|
"winnow",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tower"
|
name = "tower"
|
||||||
version = "0.4.13"
|
version = "0.4.13"
|
||||||
@ -2061,6 +2105,15 @@ version = "0.52.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winnow"
|
||||||
|
version = "0.6.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f0c976aaaa0e1f90dbb21e9587cdaf1d9679a1cde8875c0d6bd83ab96a208352"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winreg"
|
name = "winreg"
|
||||||
version = "0.50.0"
|
version = "0.50.0"
|
||||||
|
@ -37,5 +37,6 @@ serde = { version = "1.0.197", features = ["derive"] }
|
|||||||
serde_json = "1.0.114"
|
serde_json = "1.0.114"
|
||||||
tokio = { version = "1.36.0", features = ["rt", "macros"] }
|
tokio = { version = "1.36.0", features = ["rt", "macros"] }
|
||||||
tokio-stream = "0.1.15"
|
tokio-stream = "0.1.15"
|
||||||
|
toml = { version = "0.8.12", default-features = false, features = ["parse"] }
|
||||||
url = "2.5.0"
|
url = "2.5.0"
|
||||||
urlencoding = "2.1.3"
|
urlencoding = "2.1.3"
|
||||||
|
27
README
27
README
@ -5,22 +5,25 @@ it sources from google, bing, brave, and a few others.
|
|||||||
there's a demo instance at https://s.matdoes.dev, but don't use it as your
|
there's a demo instance at https://s.matdoes.dev, but don't use it as your
|
||||||
default or rely on it, please (so i don't get ratelimited by google).
|
default or rely on it, please (so i don't get ratelimited by google).
|
||||||
|
|
||||||
|
USAGE
|
||||||
|
|
||||||
|
build it with `cargo b -r`, the resulting binary will be at
|
||||||
|
`target/release/metasearch2`.
|
||||||
|
|
||||||
|
the config.toml file is created in your current working directory on the first
|
||||||
|
run of metasearch2. alternatively, you can copy the example-config.toml in the
|
||||||
|
repo and rename it to config.toml.
|
||||||
|
|
||||||
|
the default port is port 28019.
|
||||||
|
|
||||||
|
CONTRIBUTING
|
||||||
|
|
||||||
it's written in rust using no templating engine and with as little client-side
|
it's written in rust using no templating engine and with as little client-side
|
||||||
javascript as possible.
|
javascript as possible.
|
||||||
|
|
||||||
metasearch2 is a single binary with no cli, configuration file, or database.
|
FORKS
|
||||||
if you want to configure it (like to change the default port or weights of
|
|
||||||
engines) then you have to modify the source.
|
|
||||||
|
|
||||||
build it with `cargo b -r`, the resulting binary will be at
|
|
||||||
`target/release/metasearch2`. it runs on port 28019.
|
|
||||||
|
|
||||||
note that metasearch2 is primarily made for myself, so only features i actually
|
|
||||||
use will be merged. however i highly encourage you to fork it to add features
|
|
||||||
you want, and in fact that would make me very happy. also, the code is public
|
|
||||||
domain so you can do absolutely whatever you want with it.
|
|
||||||
|
|
||||||
here's a probably incomplete list of maintained forks that add new features:
|
here's a probably incomplete list of maintained forks that add new features:
|
||||||
|
|
||||||
- https://github.com/mrcbax/metasearch2/tree/seo_spam
|
- https://github.com/mrcbax/metasearch2/tree/seo_spam
|
||||||
- https://git.shrecked.dev/Shrecknt/metasearch
|
- https://git.shrecked.dev/Shrecknt/metasearch
|
||||||
|
|
||||||
|
9
default-config.toml
Normal file
9
default-config.toml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
bind = "0.0.0.0:28019"
|
||||||
|
|
||||||
|
[engines]
|
||||||
|
google = { weight = 1.05 }
|
||||||
|
bing = { weight = 1.0 }
|
||||||
|
brave = { weight = 1.25 }
|
||||||
|
marginalia = { weight = 0.15 }
|
||||||
|
|
||||||
|
# etc
|
152
src/config.rs
Normal file
152
src/config.rs
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
use std::{collections::HashMap, fs, net::SocketAddr, path::Path};
|
||||||
|
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
use crate::engines::Engine;
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct Config {
|
||||||
|
pub bind: SocketAddr,
|
||||||
|
pub engines: EnginesConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Config {
|
||||||
|
pub fn read_or_create() -> eyre::Result<Self> {
|
||||||
|
let default_config_str = include_str!("../default-config.toml");
|
||||||
|
let default_config = toml::from_str(default_config_str)?;
|
||||||
|
|
||||||
|
let config_path = Path::new("config.toml");
|
||||||
|
if config_path.exists() {
|
||||||
|
let mut given_config = toml::from_str::<Config>(&fs::read_to_string(config_path)?)?;
|
||||||
|
given_config.update(default_config);
|
||||||
|
Ok(given_config)
|
||||||
|
} else {
|
||||||
|
println!("No config found, creating one at {config_path:?}");
|
||||||
|
fs::write(config_path, default_config_str)?;
|
||||||
|
Ok(default_config)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the current config with the given config. This is used to make it so
|
||||||
|
// the default-config.toml is always used as a fallback if the user decides to
|
||||||
|
// use the default for something.
|
||||||
|
pub fn update(&mut self, other: Self) {
|
||||||
|
self.bind = other.bind;
|
||||||
|
for (key, value) in other.engines.map {
|
||||||
|
if let Some(existing) = self.engines.map.get_mut(&key) {
|
||||||
|
existing.update(value);
|
||||||
|
} else {
|
||||||
|
self.engines.map.insert(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct EnginesConfig {
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub map: HashMap<Engine, DefaultableEngineConfig>,
|
||||||
|
}
|
||||||
|
|
||||||
|
static DEFAULT_ENABLED_FULL_ENGINE_CONFIG: Lazy<FullEngineConfig> =
|
||||||
|
Lazy::new(FullEngineConfig::default);
|
||||||
|
static DEFAULT_DISABLED_FULL_ENGINE_CONFIG: Lazy<FullEngineConfig> =
|
||||||
|
Lazy::new(|| FullEngineConfig {
|
||||||
|
enabled: false,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
|
||||||
|
impl EnginesConfig {
|
||||||
|
pub fn get(&self, engine: Engine) -> &FullEngineConfig {
|
||||||
|
match self.map.get(&engine) {
|
||||||
|
Some(engine_config) => match engine_config {
|
||||||
|
DefaultableEngineConfig::Boolean(enabled) => {
|
||||||
|
if *enabled {
|
||||||
|
&DEFAULT_ENABLED_FULL_ENGINE_CONFIG
|
||||||
|
} else {
|
||||||
|
&DEFAULT_DISABLED_FULL_ENGINE_CONFIG
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DefaultableEngineConfig::Full(full) => full,
|
||||||
|
},
|
||||||
|
None => &DEFAULT_ENABLED_FULL_ENGINE_CONFIG,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Clone)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum DefaultableEngineConfig {
|
||||||
|
Boolean(bool),
|
||||||
|
Full(FullEngineConfig),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DefaultableEngineConfig {
|
||||||
|
pub fn update(&mut self, other: Self) {
|
||||||
|
match (self, other) {
|
||||||
|
(Self::Boolean(existing), Self::Boolean(other)) => *existing = other,
|
||||||
|
(Self::Full(existing), Self::Full(other)) => existing.update(other),
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for DefaultableEngineConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Boolean(true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Clone)]
|
||||||
|
pub struct FullEngineConfig {
|
||||||
|
#[serde(default = "default_true")]
|
||||||
|
pub enabled: bool,
|
||||||
|
|
||||||
|
/// The priority of this engine relative to the other engines. The default
|
||||||
|
/// is 1, and a value of 0 is treated as the default.
|
||||||
|
#[serde(default)]
|
||||||
|
pub weight: f64,
|
||||||
|
/// Per-engine configs. These are parsed at request time.
|
||||||
|
#[serde(flatten)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub extra: toml::Table,
|
||||||
|
}
|
||||||
|
|
||||||
|
// serde expects a function as the default, this just exists so "enabled" is
|
||||||
|
// always true by default
|
||||||
|
fn default_true() -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<DefaultableEngineConfig> for FullEngineConfig {
|
||||||
|
fn from(config: DefaultableEngineConfig) -> Self {
|
||||||
|
match config {
|
||||||
|
DefaultableEngineConfig::Boolean(enabled) => Self {
|
||||||
|
enabled,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
DefaultableEngineConfig::Full(full) => full,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FullEngineConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
enabled: true,
|
||||||
|
weight: 1.0,
|
||||||
|
extra: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FullEngineConfig {
|
||||||
|
pub fn update(&mut self, other: Self) {
|
||||||
|
self.enabled = other.enabled;
|
||||||
|
if other.weight != 0. {
|
||||||
|
self.weight = other.weight;
|
||||||
|
}
|
||||||
|
self.extra = other.extra;
|
||||||
|
}
|
||||||
|
}
|
@ -19,6 +19,17 @@ macro_rules! engines {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FromStr for Engine {
|
||||||
|
type Err = ();
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
$($id => Ok(Engine::$engine),)*
|
||||||
|
_ => Err(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,11 +10,12 @@ use std::{
|
|||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use reqwest::header::HeaderMap;
|
use reqwest::header::HeaderMap;
|
||||||
|
use serde::{Deserialize, Deserializer};
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
mod macros;
|
mod macros;
|
||||||
use crate::{
|
use crate::{
|
||||||
engine_autocomplete_requests, engine_postsearch_requests, engine_requests, engine_weights,
|
config::Config, engine_autocomplete_requests, engine_postsearch_requests, engine_requests,
|
||||||
engines,
|
engines,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -39,15 +40,7 @@ engines! {
|
|||||||
// post-search
|
// post-search
|
||||||
StackExchange = "stackexchange",
|
StackExchange = "stackexchange",
|
||||||
GitHub = "github",
|
GitHub = "github",
|
||||||
DocsRs = "docs.rs",
|
DocsRs = "docs_rs",
|
||||||
}
|
|
||||||
|
|
||||||
engine_weights! {
|
|
||||||
Google = 1.05,
|
|
||||||
Bing = 1.0,
|
|
||||||
Brave = 1.25,
|
|
||||||
Marginalia = 0.15,
|
|
||||||
// defaults to 1.0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
engine_requests! {
|
engine_requests! {
|
||||||
@ -83,6 +76,16 @@ impl fmt::Display for Engine {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'de> Deserialize<'de> for Engine {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Engine, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
let s = String::deserialize(deserializer)?;
|
||||||
|
Engine::from_str(&s).map_err(|_| serde::de::Error::custom(format!("invalid engine '{s}'")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct SearchQuery {
|
pub struct SearchQuery {
|
||||||
pub query: String,
|
pub query: String,
|
||||||
pub request_headers: HashMap<String, String>,
|
pub request_headers: HashMap<String, String>,
|
||||||
@ -224,18 +227,23 @@ impl ProgressUpdate {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn search_with_engines(
|
pub async fn search(
|
||||||
engines: &[Engine],
|
config: &Config,
|
||||||
query: &SearchQuery,
|
query: &SearchQuery,
|
||||||
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
||||||
) -> eyre::Result<()> {
|
) -> eyre::Result<()> {
|
||||||
let start_time = Instant::now();
|
let start_time = Instant::now();
|
||||||
|
|
||||||
let mut requests = Vec::new();
|
let progress_tx = &progress_tx;
|
||||||
for engine in engines {
|
|
||||||
requests.push(async {
|
|
||||||
let engine = *engine;
|
|
||||||
|
|
||||||
|
let mut requests = Vec::new();
|
||||||
|
for &engine in Engine::all() {
|
||||||
|
let engine_config = config.engines.get(engine);
|
||||||
|
if !engine_config.enabled {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
requests.push(async move {
|
||||||
let request_response = engine.request(query);
|
let request_response = engine.request(query);
|
||||||
|
|
||||||
let response = match request_response {
|
let response = match request_response {
|
||||||
@ -309,7 +317,7 @@ pub async fn search_with_engines(
|
|||||||
join_all(response_futures).await.into_iter().collect();
|
join_all(response_futures).await.into_iter().collect();
|
||||||
let responses = responses_result?;
|
let responses = responses_result?;
|
||||||
|
|
||||||
let response = merge_engine_responses(responses);
|
let response = merge_engine_responses(config, responses);
|
||||||
|
|
||||||
let has_infobox = response.infobox.is_some();
|
let has_infobox = response.infobox.is_some();
|
||||||
|
|
||||||
@ -322,9 +330,14 @@ pub async fn search_with_engines(
|
|||||||
// post-search
|
// post-search
|
||||||
|
|
||||||
let mut postsearch_requests = Vec::new();
|
let mut postsearch_requests = Vec::new();
|
||||||
for engine in engines {
|
for &engine in Engine::all() {
|
||||||
|
let engine_config = config.engines.get(engine);
|
||||||
|
if !engine_config.enabled {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(request) = engine.postsearch_request(&response) {
|
if let Some(request) = engine.postsearch_request(&response) {
|
||||||
postsearch_requests.push(async {
|
postsearch_requests.push(async move {
|
||||||
let response = match request.send().await {
|
let response = match request.send().await {
|
||||||
Ok(mut res) => {
|
Ok(mut res) => {
|
||||||
let mut body_bytes = Vec::new();
|
let mut body_bytes = Vec::new();
|
||||||
@ -341,7 +354,7 @@ pub async fn search_with_engines(
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Ok((*engine, response))
|
Ok((engine, response))
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -373,14 +386,16 @@ pub async fn search_with_engines(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn autocomplete_with_engines(
|
pub async fn autocomplete(config: &Config, query: &str) -> eyre::Result<Vec<String>> {
|
||||||
engines: &[Engine],
|
|
||||||
query: &str,
|
|
||||||
) -> eyre::Result<Vec<String>> {
|
|
||||||
let mut requests = Vec::new();
|
let mut requests = Vec::new();
|
||||||
for engine in engines {
|
for &engine in Engine::all() {
|
||||||
|
let config = config.engines.get(engine);
|
||||||
|
if !config.enabled {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(request) = engine.request_autocomplete(query) {
|
if let Some(request) = engine.request_autocomplete(query) {
|
||||||
requests.push(async {
|
requests.push(async move {
|
||||||
let response = match request {
|
let response = match request {
|
||||||
RequestAutocompleteResponse::Http(request) => {
|
RequestAutocompleteResponse::Http(request) => {
|
||||||
let res = request.send().await?;
|
let res = request.send().await?;
|
||||||
@ -389,7 +404,7 @@ pub async fn autocomplete_with_engines(
|
|||||||
}
|
}
|
||||||
RequestAutocompleteResponse::Instant(response) => response,
|
RequestAutocompleteResponse::Instant(response) => response,
|
||||||
};
|
};
|
||||||
Ok((*engine, response))
|
Ok((engine, response))
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -403,7 +418,7 @@ pub async fn autocomplete_with_engines(
|
|||||||
join_all(autocomplete_futures).await.into_iter().collect();
|
join_all(autocomplete_futures).await.into_iter().collect();
|
||||||
let autocomplete_results = autocomplete_results_result?;
|
let autocomplete_results = autocomplete_results_result?;
|
||||||
|
|
||||||
Ok(merge_autocomplete_responses(autocomplete_results))
|
Ok(merge_autocomplete_responses(config, autocomplete_results))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {
|
pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {
|
||||||
@ -421,19 +436,6 @@ pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
});
|
});
|
||||||
|
|
||||||
pub async fn search(
|
|
||||||
query: SearchQuery,
|
|
||||||
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
|
||||||
) -> eyre::Result<()> {
|
|
||||||
let engines = Engine::all();
|
|
||||||
search_with_engines(engines, &query, progress_tx).await
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn autocomplete(query: &str) -> eyre::Result<Vec<String>> {
|
|
||||||
let engines = Engine::all();
|
|
||||||
autocomplete_with_engines(engines, query).await
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Response {
|
pub struct Response {
|
||||||
pub search_results: Vec<SearchResult>,
|
pub search_results: Vec<SearchResult>,
|
||||||
@ -471,18 +473,20 @@ pub struct Infobox {
|
|||||||
pub engine: Engine,
|
pub engine: Engine,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Response {
|
fn merge_engine_responses(config: &Config, responses: HashMap<Engine, EngineResponse>) -> Response {
|
||||||
let mut search_results: Vec<SearchResult> = Vec::new();
|
let mut search_results: Vec<SearchResult> = Vec::new();
|
||||||
let mut featured_snippet: Option<FeaturedSnippet> = None;
|
let mut featured_snippet: Option<FeaturedSnippet> = None;
|
||||||
let mut answer: Option<Answer> = None;
|
let mut answer: Option<Answer> = None;
|
||||||
let mut infobox: Option<Infobox> = None;
|
let mut infobox: Option<Infobox> = None;
|
||||||
|
|
||||||
for (engine, response) in responses {
|
for (engine, response) in responses {
|
||||||
|
let engine_config = config.engines.get(engine);
|
||||||
|
|
||||||
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
|
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
|
||||||
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
||||||
// score of 0.33, etc.
|
// score of 0.33, etc.
|
||||||
let base_result_score = 1. / (result_index + 1) as f64;
|
let base_result_score = 1. / (result_index + 1) as f64;
|
||||||
let result_score = base_result_score * engine.weight();
|
let result_score = base_result_score * engine_config.weight;
|
||||||
|
|
||||||
if let Some(existing_result) = search_results
|
if let Some(existing_result) = search_results
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
@ -490,11 +494,14 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
|
|||||||
{
|
{
|
||||||
// if the weight of this engine is higher than every other one then replace the
|
// if the weight of this engine is higher than every other one then replace the
|
||||||
// title and description
|
// title and description
|
||||||
if engine.weight()
|
if engine_config.weight
|
||||||
> existing_result
|
> existing_result
|
||||||
.engines
|
.engines
|
||||||
.iter()
|
.iter()
|
||||||
.map(Engine::weight)
|
.map(|&other_engine| {
|
||||||
|
let other_engine_config = config.engines.get(other_engine);
|
||||||
|
other_engine_config.weight
|
||||||
|
})
|
||||||
.max_by(|a, b| a.partial_cmp(b).unwrap())
|
.max_by(|a, b| a.partial_cmp(b).unwrap())
|
||||||
.unwrap_or(0.)
|
.unwrap_or(0.)
|
||||||
{
|
{
|
||||||
@ -517,9 +524,11 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
|
|||||||
|
|
||||||
if let Some(engine_featured_snippet) = response.featured_snippet {
|
if let Some(engine_featured_snippet) = response.featured_snippet {
|
||||||
// if it has a higher weight than the current featured snippet
|
// if it has a higher weight than the current featured snippet
|
||||||
let featured_snippet_weight =
|
let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| {
|
||||||
featured_snippet.as_ref().map_or(0., |s| s.engine.weight());
|
let other_engine_config = config.engines.get(s.engine);
|
||||||
if engine.weight() > featured_snippet_weight {
|
other_engine_config.weight
|
||||||
|
});
|
||||||
|
if engine_config.weight > featured_snippet_weight {
|
||||||
featured_snippet = Some(FeaturedSnippet {
|
featured_snippet = Some(FeaturedSnippet {
|
||||||
url: engine_featured_snippet.url,
|
url: engine_featured_snippet.url,
|
||||||
title: engine_featured_snippet.title,
|
title: engine_featured_snippet.title,
|
||||||
@ -531,8 +540,11 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
|
|||||||
|
|
||||||
if let Some(engine_answer_html) = response.answer_html {
|
if let Some(engine_answer_html) = response.answer_html {
|
||||||
// if it has a higher weight than the current answer
|
// if it has a higher weight than the current answer
|
||||||
let answer_weight = answer.as_ref().map_or(0., |s| s.engine.weight());
|
let answer_weight = answer.as_ref().map_or(0., |s| {
|
||||||
if engine.weight() > answer_weight {
|
let other_engine_config = config.engines.get(s.engine);
|
||||||
|
other_engine_config.weight
|
||||||
|
});
|
||||||
|
if engine_config.weight > answer_weight {
|
||||||
answer = Some(Answer {
|
answer = Some(Answer {
|
||||||
html: engine_answer_html,
|
html: engine_answer_html,
|
||||||
engine,
|
engine,
|
||||||
@ -542,8 +554,11 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
|
|||||||
|
|
||||||
if let Some(engine_infobox_html) = response.infobox_html {
|
if let Some(engine_infobox_html) = response.infobox_html {
|
||||||
// if it has a higher weight than the current infobox
|
// if it has a higher weight than the current infobox
|
||||||
let infobox_weight = infobox.as_ref().map_or(0., |s| s.engine.weight());
|
let infobox_weight = infobox.as_ref().map_or(0., |s| {
|
||||||
if engine.weight() > infobox_weight {
|
let other_engine_config = config.engines.get(s.engine);
|
||||||
|
other_engine_config.weight
|
||||||
|
});
|
||||||
|
if engine_config.weight > infobox_weight {
|
||||||
infobox = Some(Infobox {
|
infobox = Some(Infobox {
|
||||||
html: engine_infobox_html,
|
html: engine_infobox_html,
|
||||||
engine,
|
engine,
|
||||||
@ -567,15 +582,20 @@ pub struct AutocompleteResult {
|
|||||||
pub score: f64,
|
pub score: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_autocomplete_responses(responses: HashMap<Engine, Vec<String>>) -> Vec<String> {
|
fn merge_autocomplete_responses(
|
||||||
|
config: &Config,
|
||||||
|
responses: HashMap<Engine, Vec<String>>,
|
||||||
|
) -> Vec<String> {
|
||||||
let mut autocomplete_results: Vec<AutocompleteResult> = Vec::new();
|
let mut autocomplete_results: Vec<AutocompleteResult> = Vec::new();
|
||||||
|
|
||||||
for (engine, response) in responses {
|
for (engine, response) in responses {
|
||||||
|
let engine_config = config.engines.get(engine);
|
||||||
|
|
||||||
for (result_index, autocomplete_result) in response.into_iter().enumerate() {
|
for (result_index, autocomplete_result) in response.into_iter().enumerate() {
|
||||||
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
|
||||||
// score of 0.33, etc.
|
// score of 0.33, etc.
|
||||||
let base_result_score = 1. / (result_index + 1) as f64;
|
let base_result_score = 1. / (result_index + 1) as f64;
|
||||||
let result_score = base_result_score * engine.weight();
|
let result_score = base_result_score * engine_config.weight;
|
||||||
|
|
||||||
if let Some(existing_result) = autocomplete_results
|
if let Some(existing_result) = autocomplete_results
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
|
@ -1,10 +1,27 @@
|
|||||||
use reqwest::Url;
|
use reqwest::Url;
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
engines::{EngineResponse, RequestResponse, CLIENT},
|
engines::{EngineResponse, RequestResponse, CLIENT},
|
||||||
parse::{parse_html_response_with_opts, ParseOpts},
|
parse::{parse_html_response_with_opts, ParseOpts},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct MarginaliaConfig {
|
||||||
|
pub profile: String,
|
||||||
|
pub js: String,
|
||||||
|
pub adtech: String,
|
||||||
|
}
|
||||||
|
impl Default for MarginaliaConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
profile: "corpo".to_string(),
|
||||||
|
js: "default".to_string(),
|
||||||
|
adtech: "default".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn request(query: &str) -> RequestResponse {
|
pub fn request(query: &str) -> RequestResponse {
|
||||||
// if the query is more than 3 words or has any special characters then abort
|
// if the query is more than 3 words or has any special characters then abort
|
||||||
if query.split_whitespace().count() > 3
|
if query.split_whitespace().count() > 3
|
||||||
|
12
src/main.rs
12
src/main.rs
@ -1,3 +1,6 @@
|
|||||||
|
use config::Config;
|
||||||
|
|
||||||
|
pub mod config;
|
||||||
pub mod engines;
|
pub mod engines;
|
||||||
pub mod normalize;
|
pub mod normalize;
|
||||||
pub mod parse;
|
pub mod parse;
|
||||||
@ -5,5 +8,12 @@ pub mod web;
|
|||||||
|
|
||||||
#[tokio::main(flavor = "current_thread")]
|
#[tokio::main(flavor = "current_thread")]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
web::run().await;
|
let config = match Config::read_or_create() {
|
||||||
|
Ok(config) => config,
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("Couldn't parse config:\n{err}");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
web::run(config).await;
|
||||||
}
|
}
|
||||||
|
@ -1,17 +1,25 @@
|
|||||||
use std::collections::HashMap;
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use axum::{extract::Query, http::StatusCode, response::IntoResponse, Json};
|
use axum::{
|
||||||
|
extract::{Query, State},
|
||||||
|
http::StatusCode,
|
||||||
|
response::IntoResponse,
|
||||||
|
Json,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::engines;
|
use crate::{config::Config, engines};
|
||||||
|
|
||||||
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
|
pub async fn route(
|
||||||
|
Query(params): Query<HashMap<String, String>>,
|
||||||
|
State(config): State<Arc<Config>>,
|
||||||
|
) -> impl IntoResponse {
|
||||||
let query = params
|
let query = params
|
||||||
.get("q")
|
.get("q")
|
||||||
.cloned()
|
.cloned()
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
.replace('\n', " ");
|
.replace('\n', " ");
|
||||||
|
|
||||||
let res = match engines::autocomplete(&query).await {
|
let res = match engines::autocomplete(&config, &query).await {
|
||||||
Ok(res) => res,
|
Ok(res) => res,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
eprintln!("Autocomplete error for {query}: {err}");
|
eprintln!("Autocomplete error for {query}: {err}");
|
||||||
|
@ -2,13 +2,15 @@ pub mod autocomplete;
|
|||||||
pub mod opensearch;
|
pub mod opensearch;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
|
|
||||||
use std::net::SocketAddr;
|
use std::{net::SocketAddr, sync::Arc};
|
||||||
|
|
||||||
use axum::{http::header, routing::get, Router};
|
use axum::{http::header, routing::get, Router};
|
||||||
|
|
||||||
pub const BIND_ADDRESS: &str = "0.0.0.0:28019";
|
use crate::config::Config;
|
||||||
|
|
||||||
|
pub async fn run(config: Config) {
|
||||||
|
let bind_addr = config.bind;
|
||||||
|
|
||||||
pub async fn run() {
|
|
||||||
let app = Router::new()
|
let app = Router::new()
|
||||||
.route(
|
.route(
|
||||||
"/",
|
"/",
|
||||||
@ -48,11 +50,12 @@ pub async fn run() {
|
|||||||
)
|
)
|
||||||
.route("/opensearch.xml", get(opensearch::route))
|
.route("/opensearch.xml", get(opensearch::route))
|
||||||
.route("/search", get(search::route))
|
.route("/search", get(search::route))
|
||||||
.route("/autocomplete", get(autocomplete::route));
|
.route("/autocomplete", get(autocomplete::route))
|
||||||
|
.with_state(Arc::new(config));
|
||||||
|
|
||||||
println!("Listening on {BIND_ADDRESS}");
|
println!("Listening on {bind_addr}");
|
||||||
|
|
||||||
let listener = tokio::net::TcpListener::bind(BIND_ADDRESS).await.unwrap();
|
let listener = tokio::net::TcpListener::bind(bind_addr).await.unwrap();
|
||||||
axum::serve(
|
axum::serve(
|
||||||
listener,
|
listener,
|
||||||
app.into_make_service_with_connect_info::<SocketAddr>(),
|
app.into_make_service_with_connect_info::<SocketAddr>(),
|
||||||
|
@ -1,17 +1,18 @@
|
|||||||
use std::{collections::HashMap, net::SocketAddr};
|
use std::{collections::HashMap, net::SocketAddr, sync::Arc};
|
||||||
|
|
||||||
use async_stream::stream;
|
use async_stream::stream;
|
||||||
use axum::{
|
use axum::{
|
||||||
body::Body,
|
body::Body,
|
||||||
extract::{ConnectInfo, Query},
|
extract::{ConnectInfo, Query, State},
|
||||||
http::{header, HeaderMap, StatusCode},
|
http::{header, HeaderMap, StatusCode},
|
||||||
response::IntoResponse,
|
response::IntoResponse,
|
||||||
};
|
};
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use html_escape::{encode_text, encode_unquoted_attribute};
|
use html_escape::{encode_text, encode_unquoted_attribute};
|
||||||
|
|
||||||
use crate::engines::{
|
use crate::{
|
||||||
self, Engine, EngineProgressUpdate, ProgressUpdateData, Response, SearchQuery,
|
config::Config,
|
||||||
|
engines::{self, Engine, EngineProgressUpdate, ProgressUpdateData, Response, SearchQuery},
|
||||||
};
|
};
|
||||||
|
|
||||||
fn render_beginning_of_html(query: &str) -> String {
|
fn render_beginning_of_html(query: &str) -> String {
|
||||||
@ -144,6 +145,7 @@ fn render_engine_progress_update(
|
|||||||
|
|
||||||
pub async fn route(
|
pub async fn route(
|
||||||
Query(params): Query<HashMap<String, String>>,
|
Query(params): Query<HashMap<String, String>>,
|
||||||
|
State(config): State<Arc<Config>>,
|
||||||
headers: HeaderMap,
|
headers: HeaderMap,
|
||||||
ConnectInfo(addr): ConnectInfo<SocketAddr>,
|
ConnectInfo(addr): ConnectInfo<SocketAddr>,
|
||||||
) -> impl IntoResponse {
|
) -> impl IntoResponse {
|
||||||
@ -204,7 +206,7 @@ pub async fn route(
|
|||||||
|
|
||||||
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
|
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
|
||||||
|
|
||||||
let search_future = tokio::spawn(async move { engines::search(query, progress_tx).await });
|
let search_future = tokio::spawn(async move { engines::search(&config, &query, progress_tx).await });
|
||||||
|
|
||||||
while let Some(progress_update) = progress_rx.recv().await {
|
while let Some(progress_update) = progress_rx.recv().await {
|
||||||
match progress_update.data {
|
match progress_update.data {
|
||||||
|
Loading…
Reference in New Issue
Block a user