add marginalia search
This commit is contained in:
parent
05299fbe46
commit
e42a146ce6
71
Cargo.lock
generated
71
Cargo.lock
generated
@ -39,6 +39,21 @@ dependencies = [
|
|||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "alloc-no-stdlib"
|
||||||
|
version = "2.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "alloc-stdlib"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
|
||||||
|
dependencies = [
|
||||||
|
"alloc-no-stdlib",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ammonia"
|
name = "ammonia"
|
||||||
version = "3.3.0"
|
version = "3.3.0"
|
||||||
@ -52,6 +67,20 @@ dependencies = [
|
|||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-compression"
|
||||||
|
version = "0.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5"
|
||||||
|
dependencies = [
|
||||||
|
"brotli",
|
||||||
|
"flate2",
|
||||||
|
"futures-core",
|
||||||
|
"memchr",
|
||||||
|
"pin-project-lite",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-stream"
|
name = "async-stream"
|
||||||
version = "0.3.5"
|
version = "0.3.5"
|
||||||
@ -177,6 +206,27 @@ version = "2.4.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
|
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "brotli"
|
||||||
|
version = "3.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f"
|
||||||
|
dependencies = [
|
||||||
|
"alloc-no-stdlib",
|
||||||
|
"alloc-stdlib",
|
||||||
|
"brotli-decompressor",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "brotli-decompressor"
|
||||||
|
version = "2.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
|
||||||
|
dependencies = [
|
||||||
|
"alloc-no-stdlib",
|
||||||
|
"alloc-stdlib",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bumpalo"
|
name = "bumpalo"
|
||||||
version = "3.14.0"
|
version = "3.14.0"
|
||||||
@ -226,6 +276,15 @@ version = "0.8.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
|
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crc32fast"
|
||||||
|
version = "1.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cssparser"
|
name = "cssparser"
|
||||||
version = "0.31.2"
|
version = "0.31.2"
|
||||||
@ -312,6 +371,16 @@ version = "1.3.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e470ea3be6ce980f4d7f6cc08a6084e7715f2b052eeb1f123f2d4d8fb1d35de1"
|
checksum = "e470ea3be6ce980f4d7f6cc08a6084e7715f2b052eeb1f123f2d4d8fb1d35de1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "flate2"
|
||||||
|
version = "1.0.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
|
||||||
|
dependencies = [
|
||||||
|
"crc32fast",
|
||||||
|
"miniz_oxide",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fnv"
|
name = "fnv"
|
||||||
version = "1.0.7"
|
version = "1.0.7"
|
||||||
@ -1095,6 +1164,7 @@ version = "0.11.23"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41"
|
checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"async-compression",
|
||||||
"base64",
|
"base64",
|
||||||
"bytes",
|
"bytes",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
@ -1120,6 +1190,7 @@ dependencies = [
|
|||||||
"system-configuration",
|
"system-configuration",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-rustls",
|
"tokio-rustls",
|
||||||
|
"tokio-util",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
"url",
|
"url",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
|
@ -25,6 +25,9 @@ rand = "0.8.5"
|
|||||||
regex = "1.10.2"
|
regex = "1.10.2"
|
||||||
reqwest = { version = "0.11.23", default-features = false, features = [
|
reqwest = { version = "0.11.23", default-features = false, features = [
|
||||||
"rustls-tls",
|
"rustls-tls",
|
||||||
|
"gzip",
|
||||||
|
"deflate",
|
||||||
|
"brotli",
|
||||||
] }
|
] }
|
||||||
scraper = "0.18.1"
|
scraper = "0.18.1"
|
||||||
serde = { version = "1.0.193", features = ["derive"] }
|
serde = { version = "1.0.193", features = ["derive"] }
|
||||||
|
@ -22,6 +22,7 @@ pub enum Engine {
|
|||||||
Google,
|
Google,
|
||||||
Bing,
|
Bing,
|
||||||
Brave,
|
Brave,
|
||||||
|
Marginalia,
|
||||||
// answer
|
// answer
|
||||||
Useragent,
|
Useragent,
|
||||||
Ip,
|
Ip,
|
||||||
@ -39,6 +40,7 @@ impl Engine {
|
|||||||
Engine::Google,
|
Engine::Google,
|
||||||
Engine::Bing,
|
Engine::Bing,
|
||||||
Engine::Brave,
|
Engine::Brave,
|
||||||
|
Engine::Marginalia,
|
||||||
Engine::Useragent,
|
Engine::Useragent,
|
||||||
Engine::Ip,
|
Engine::Ip,
|
||||||
Engine::Calc,
|
Engine::Calc,
|
||||||
@ -54,6 +56,7 @@ impl Engine {
|
|||||||
Engine::Google => "google",
|
Engine::Google => "google",
|
||||||
Engine::Bing => "bing",
|
Engine::Bing => "bing",
|
||||||
Engine::Brave => "brave",
|
Engine::Brave => "brave",
|
||||||
|
Engine::Marginalia => "marginalia",
|
||||||
Engine::Useragent => "useragent",
|
Engine::Useragent => "useragent",
|
||||||
Engine::Ip => "ip",
|
Engine::Ip => "ip",
|
||||||
Engine::Calc => "calc",
|
Engine::Calc => "calc",
|
||||||
@ -69,6 +72,7 @@ impl Engine {
|
|||||||
Engine::Google => 1.05,
|
Engine::Google => 1.05,
|
||||||
Engine::Bing => 1.,
|
Engine::Bing => 1.,
|
||||||
Engine::Brave => 1.25,
|
Engine::Brave => 1.25,
|
||||||
|
Engine::Marginalia => 0.3,
|
||||||
_ => 1.,
|
_ => 1.,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -78,6 +82,7 @@ impl Engine {
|
|||||||
Engine::Google => search::google::request(query).into(),
|
Engine::Google => search::google::request(query).into(),
|
||||||
Engine::Bing => search::bing::request(query).into(),
|
Engine::Bing => search::bing::request(query).into(),
|
||||||
Engine::Brave => search::brave::request(query).into(),
|
Engine::Brave => search::brave::request(query).into(),
|
||||||
|
Engine::Marginalia => search::marginalia::request(query).into(),
|
||||||
Engine::Useragent => answer::useragent::request(query).into(),
|
Engine::Useragent => answer::useragent::request(query).into(),
|
||||||
Engine::Ip => answer::ip::request(query).into(),
|
Engine::Ip => answer::ip::request(query).into(),
|
||||||
Engine::Calc => answer::calc::request(query).into(),
|
Engine::Calc => answer::calc::request(query).into(),
|
||||||
@ -91,6 +96,7 @@ impl Engine {
|
|||||||
Engine::Google => search::google::parse_response(body),
|
Engine::Google => search::google::parse_response(body),
|
||||||
Engine::Bing => search::bing::parse_response(body),
|
Engine::Bing => search::bing::parse_response(body),
|
||||||
Engine::Brave => search::brave::parse_response(body),
|
Engine::Brave => search::brave::parse_response(body),
|
||||||
|
Engine::Marginalia => search::marginalia::parse_response(body),
|
||||||
Engine::Wikipedia => answer::wikipedia::parse_response(body),
|
Engine::Wikipedia => answer::wikipedia::parse_response(body),
|
||||||
_ => eyre::bail!("engine {self:?} can't parse response"),
|
_ => eyre::bail!("engine {self:?} can't parse response"),
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
pub mod bing;
|
pub mod bing;
|
||||||
pub mod brave;
|
pub mod brave;
|
||||||
pub mod google;
|
pub mod google;
|
||||||
|
pub mod marginalia;
|
||||||
|
38
src/engines/search/marginalia.rs
Normal file
38
src/engines/search/marginalia.rs
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
use reqwest::Url;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
engines::{EngineResponse, CLIENT},
|
||||||
|
parse::{parse_html_response_with_opts, ParseOpts},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn request(query: &str) -> reqwest::RequestBuilder {
|
||||||
|
CLIENT
|
||||||
|
.get(
|
||||||
|
Url::parse_with_params(
|
||||||
|
"https://search.marginalia.nu/search",
|
||||||
|
&[
|
||||||
|
("query", query),
|
||||||
|
("profile", "default"),
|
||||||
|
("js", "default"),
|
||||||
|
("adtech", "default"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.header(
|
||||||
|
"User-Agent",
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||||
|
)
|
||||||
|
.header("Accept-Language", "en-US,en;q=0.5")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||||
|
parse_html_response_with_opts(
|
||||||
|
body,
|
||||||
|
ParseOpts::new()
|
||||||
|
.result("section.search-result")
|
||||||
|
.title("h2")
|
||||||
|
.href("a[href]")
|
||||||
|
.description("p.description"),
|
||||||
|
)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user