add marginalia search

This commit is contained in:
mat 2023-12-31 00:07:40 -06:00
parent 05299fbe46
commit e42a146ce6
5 changed files with 119 additions and 0 deletions

71
Cargo.lock generated
View File

@ -39,6 +39,21 @@ dependencies = [
"memchr",
]
[[package]]
name = "alloc-no-stdlib"
version = "2.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
[[package]]
name = "alloc-stdlib"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
dependencies = [
"alloc-no-stdlib",
]
[[package]]
name = "ammonia"
version = "3.3.0"
@ -52,6 +67,20 @@ dependencies = [
"url",
]
[[package]]
name = "async-compression"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5"
dependencies = [
"brotli",
"flate2",
"futures-core",
"memchr",
"pin-project-lite",
"tokio",
]
[[package]]
name = "async-stream"
version = "0.3.5"
@ -177,6 +206,27 @@ version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
[[package]]
name = "brotli"
version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor",
]
[[package]]
name = "brotli-decompressor"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
]
[[package]]
name = "bumpalo"
version = "3.14.0"
@ -226,6 +276,15 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "cssparser"
version = "0.31.2"
@ -312,6 +371,16 @@ version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e470ea3be6ce980f4d7f6cc08a6084e7715f2b052eeb1f123f2d4d8fb1d35de1"
[[package]]
name = "flate2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "fnv"
version = "1.0.7"
@ -1095,6 +1164,7 @@ version = "0.11.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41"
dependencies = [
"async-compression",
"base64",
"bytes",
"encoding_rs",
@ -1120,6 +1190,7 @@ dependencies = [
"system-configuration",
"tokio",
"tokio-rustls",
"tokio-util",
"tower-service",
"url",
"wasm-bindgen",

View File

@ -25,6 +25,9 @@ rand = "0.8.5"
regex = "1.10.2"
reqwest = { version = "0.11.23", default-features = false, features = [
"rustls-tls",
"gzip",
"deflate",
"brotli",
] }
scraper = "0.18.1"
serde = { version = "1.0.193", features = ["derive"] }

View File

@ -22,6 +22,7 @@ pub enum Engine {
Google,
Bing,
Brave,
Marginalia,
// answer
Useragent,
Ip,
@ -39,6 +40,7 @@ impl Engine {
Engine::Google,
Engine::Bing,
Engine::Brave,
Engine::Marginalia,
Engine::Useragent,
Engine::Ip,
Engine::Calc,
@ -54,6 +56,7 @@ impl Engine {
Engine::Google => "google",
Engine::Bing => "bing",
Engine::Brave => "brave",
Engine::Marginalia => "marginalia",
Engine::Useragent => "useragent",
Engine::Ip => "ip",
Engine::Calc => "calc",
@ -69,6 +72,7 @@ impl Engine {
Engine::Google => 1.05,
Engine::Bing => 1.,
Engine::Brave => 1.25,
Engine::Marginalia => 0.3,
_ => 1.,
}
}
@ -78,6 +82,7 @@ impl Engine {
Engine::Google => search::google::request(query).into(),
Engine::Bing => search::bing::request(query).into(),
Engine::Brave => search::brave::request(query).into(),
Engine::Marginalia => search::marginalia::request(query).into(),
Engine::Useragent => answer::useragent::request(query).into(),
Engine::Ip => answer::ip::request(query).into(),
Engine::Calc => answer::calc::request(query).into(),
@ -91,6 +96,7 @@ impl Engine {
Engine::Google => search::google::parse_response(body),
Engine::Bing => search::bing::parse_response(body),
Engine::Brave => search::brave::parse_response(body),
Engine::Marginalia => search::marginalia::parse_response(body),
Engine::Wikipedia => answer::wikipedia::parse_response(body),
_ => eyre::bail!("engine {self:?} can't parse response"),
}

View File

@ -1,3 +1,4 @@
pub mod bing;
pub mod brave;
pub mod google;
pub mod marginalia;

View File

@ -0,0 +1,38 @@
use reqwest::Url;
use crate::{
engines::{EngineResponse, CLIENT},
parse::{parse_html_response_with_opts, ParseOpts},
};
pub fn request(query: &str) -> reqwest::RequestBuilder {
CLIENT
.get(
Url::parse_with_params(
"https://search.marginalia.nu/search",
&[
("query", query),
("profile", "default"),
("js", "default"),
("adtech", "default"),
],
)
.unwrap(),
)
.header(
"User-Agent",
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
)
.header("Accept-Language", "en-US,en;q=0.5")
}
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
parse_html_response_with_opts(
body,
ParseOpts::new()
.result("section.search-result")
.title("h2")
.href("a[href]")
.description("p.description"),
)
}