Initial commit
This commit is contained in:
commit
da972bd45e
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
/target
|
1950
Cargo.lock
generated
Normal file
1950
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
24
Cargo.toml
Normal file
24
Cargo.toml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
[package]
|
||||||
|
name = "metasearch2"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.75"
|
||||||
|
async-stream = "0.3.5"
|
||||||
|
axum = { version = "0.7.2", features = ["http2"] }
|
||||||
|
base64 = "0.21.5"
|
||||||
|
bytes = "1.5.0"
|
||||||
|
eyre = "0.6.11"
|
||||||
|
futures = "0.3.29"
|
||||||
|
html-escape = "0.2.13"
|
||||||
|
reqwest = { version = "0.11.23", default-features = false, features = [
|
||||||
|
"rustls-tls",
|
||||||
|
] }
|
||||||
|
scraper = "0.18.1"
|
||||||
|
tokio = { version = "1.35.0", features = ["full"] }
|
||||||
|
tokio-stream = "0.1.14"
|
||||||
|
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||||
|
url = "2.5.0"
|
238
src/engines/mod.rs
Normal file
238
src/engines/mod.rs
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
use std::{
|
||||||
|
collections::{BTreeSet, HashMap},
|
||||||
|
fmt,
|
||||||
|
sync::LazyLock,
|
||||||
|
time::Instant,
|
||||||
|
};
|
||||||
|
|
||||||
|
use futures::future::join_all;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
use self::search::{bing, brave, google};
|
||||||
|
|
||||||
|
pub mod search;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||||
|
pub enum Engine {
|
||||||
|
Google,
|
||||||
|
Bing,
|
||||||
|
Brave,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Engine {
|
||||||
|
pub fn all() -> &'static [Engine] {
|
||||||
|
&[Engine::Google, Engine::Bing, Engine::Brave]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn name(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Engine::Google => "google",
|
||||||
|
Engine::Bing => "bing",
|
||||||
|
Engine::Brave => "brave",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn request(&self, client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
|
||||||
|
match self {
|
||||||
|
Engine::Google => google::request(client, query),
|
||||||
|
Engine::Bing => bing::request(client, query),
|
||||||
|
Engine::Brave => brave::request(client, query),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_response(&self, body: &str) -> eyre::Result<EngineResponse> {
|
||||||
|
match self {
|
||||||
|
Engine::Google => google::parse_response(body),
|
||||||
|
Engine::Bing => bing::parse_response(body),
|
||||||
|
Engine::Brave => brave::parse_response(body),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn weight(&self) -> f64 {
|
||||||
|
match self {
|
||||||
|
Engine::Google => 1.05,
|
||||||
|
Engine::Bing => 1.,
|
||||||
|
Engine::Brave => 1.25,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct EngineSearchResult {
|
||||||
|
pub url: String,
|
||||||
|
pub title: String,
|
||||||
|
pub description: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct EngineResponse {
|
||||||
|
pub search_results: Vec<EngineSearchResult>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum ProgressUpdateKind {
|
||||||
|
Requesting,
|
||||||
|
Downloading,
|
||||||
|
Parsing,
|
||||||
|
Done,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ProgressUpdate {
|
||||||
|
pub kind: ProgressUpdateKind,
|
||||||
|
pub engine: Engine,
|
||||||
|
pub time: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ProgressUpdate {
|
||||||
|
pub fn new(kind: ProgressUpdateKind, engine: Engine, start_time: Instant) -> Self {
|
||||||
|
Self {
|
||||||
|
kind,
|
||||||
|
engine,
|
||||||
|
time: start_time.elapsed().as_secs_f64(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for ProgressUpdate {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
let message = match self.kind {
|
||||||
|
ProgressUpdateKind::Requesting => "Requesting",
|
||||||
|
ProgressUpdateKind::Downloading => "Downloading",
|
||||||
|
ProgressUpdateKind::Parsing => "Parsing",
|
||||||
|
ProgressUpdateKind::Done => "Done",
|
||||||
|
};
|
||||||
|
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{time:.3}s {message} {engine}",
|
||||||
|
time = self.time,
|
||||||
|
message = message,
|
||||||
|
engine = self.engine.name()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn search_with_client_and_engines(
|
||||||
|
client: &reqwest::Client,
|
||||||
|
engines: &[Engine],
|
||||||
|
query: &str,
|
||||||
|
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
||||||
|
) -> eyre::Result<Response> {
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
let mut requests = Vec::new();
|
||||||
|
for engine in engines {
|
||||||
|
requests.push(async {
|
||||||
|
let engine = *engine;
|
||||||
|
progress_tx.send(ProgressUpdate::new(
|
||||||
|
ProgressUpdateKind::Requesting,
|
||||||
|
engine,
|
||||||
|
start_time,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
let res = engine.request(client, query).send().await?;
|
||||||
|
|
||||||
|
progress_tx.send(ProgressUpdate::new(
|
||||||
|
ProgressUpdateKind::Downloading,
|
||||||
|
engine,
|
||||||
|
start_time,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
let body = res.text().await?;
|
||||||
|
|
||||||
|
progress_tx.send(ProgressUpdate::new(
|
||||||
|
ProgressUpdateKind::Parsing,
|
||||||
|
engine,
|
||||||
|
start_time,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
let response = engine.parse_response(&body)?;
|
||||||
|
|
||||||
|
progress_tx.send(ProgressUpdate::new(
|
||||||
|
ProgressUpdateKind::Done,
|
||||||
|
engine,
|
||||||
|
start_time,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
Ok((engine, response))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut response_futures = Vec::new();
|
||||||
|
for request in requests {
|
||||||
|
response_futures.push(request);
|
||||||
|
}
|
||||||
|
|
||||||
|
let responses_result: eyre::Result<HashMap<_, _>> =
|
||||||
|
join_all(response_futures).await.into_iter().collect();
|
||||||
|
let responses = responses_result?;
|
||||||
|
|
||||||
|
Ok(merge_engine_responses(responses))
|
||||||
|
}
|
||||||
|
|
||||||
|
static CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| reqwest::Client::new());
|
||||||
|
|
||||||
|
pub async fn search(
|
||||||
|
query: &str,
|
||||||
|
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
||||||
|
) -> eyre::Result<Response> {
|
||||||
|
let engines = Engine::all();
|
||||||
|
search_with_client_and_engines(&CLIENT, &engines, query, progress_tx).await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Response {
|
||||||
|
pub search_results: Vec<SearchResult>,
|
||||||
|
}
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SearchResult {
|
||||||
|
pub url: String,
|
||||||
|
pub title: String,
|
||||||
|
pub description: String,
|
||||||
|
pub engines: BTreeSet<Engine>,
|
||||||
|
pub score: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Response {
|
||||||
|
let mut search_results: Vec<SearchResult> = Vec::new();
|
||||||
|
for (engine, response) in responses {
|
||||||
|
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
|
||||||
|
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a score of 0.33, etc.
|
||||||
|
let base_result_score = 1. / (result_index + 1) as f64;
|
||||||
|
let result_score = base_result_score * engine.weight();
|
||||||
|
|
||||||
|
if let Some(existing_result) = search_results
|
||||||
|
.iter_mut()
|
||||||
|
.find(|r| r.url == search_result.url)
|
||||||
|
{
|
||||||
|
// if the weight of this engine is higher than every other one then replace the title and description
|
||||||
|
if engine.weight()
|
||||||
|
> existing_result
|
||||||
|
.engines
|
||||||
|
.iter()
|
||||||
|
.map(Engine::weight)
|
||||||
|
.max_by(|a, b| a.partial_cmp(b).unwrap())
|
||||||
|
.unwrap_or(0.)
|
||||||
|
{
|
||||||
|
existing_result.title = search_result.title;
|
||||||
|
existing_result.description = search_result.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
existing_result.engines.insert(engine);
|
||||||
|
existing_result.score += result_score;
|
||||||
|
} else {
|
||||||
|
search_results.push(SearchResult {
|
||||||
|
url: search_result.url,
|
||||||
|
title: search_result.title,
|
||||||
|
description: search_result.description,
|
||||||
|
engines: [engine].iter().cloned().collect(),
|
||||||
|
score: result_score,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||||
|
|
||||||
|
Response { search_results }
|
||||||
|
}
|
3
src/engines/search.rs
Normal file
3
src/engines/search.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
pub mod bing;
|
||||||
|
pub mod brave;
|
||||||
|
pub mod google;
|
62
src/engines/search/bing.rs
Normal file
62
src/engines/search/bing.rs
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
use base64::Engine;
|
||||||
|
use reqwest::Url;
|
||||||
|
use scraper::{ElementRef, Selector};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
engines::EngineResponse,
|
||||||
|
parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn request(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
|
||||||
|
client
|
||||||
|
.get(
|
||||||
|
Url::parse_with_params(
|
||||||
|
"https://www.bing.com/search",
|
||||||
|
// filters=rcrse:"1" makes it not try to autocorrect
|
||||||
|
&[("q", query), ("filters", "rcrse:\"1\"")],
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.header(
|
||||||
|
"User-Agent",
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||||
|
)
|
||||||
|
.header("Accept-Language", "en-US,en;q=0.5")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||||
|
parse_html_response_with_opts(
|
||||||
|
body,
|
||||||
|
ParseOpts {
|
||||||
|
result_item: "#b_results > li.b_algo",
|
||||||
|
title: ".b_algo h2 > a",
|
||||||
|
href: QueryMethod::Manual(Box::new(|el: &ElementRef| {
|
||||||
|
let url = el
|
||||||
|
.select(&Selector::parse("a").unwrap())
|
||||||
|
.next()
|
||||||
|
.and_then(|n| n.value().attr("href"))
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// clean up bing's tracking urls
|
||||||
|
if url.starts_with("https://www.bing.com/ck/a?") {
|
||||||
|
// get the u param
|
||||||
|
let url = Url::parse(url)?;
|
||||||
|
let u = url
|
||||||
|
.query_pairs()
|
||||||
|
.find(|(key, _)| key == "u")
|
||||||
|
.unwrap_or_default()
|
||||||
|
.1;
|
||||||
|
// cut off the "a1" and base64 decode
|
||||||
|
let u = base64::engine::general_purpose::URL_SAFE_NO_PAD
|
||||||
|
.decode(&u[2..])
|
||||||
|
.unwrap_or_default();
|
||||||
|
// now normalize that one instead
|
||||||
|
Ok(String::from_utf8_lossy(&u).to_string())
|
||||||
|
} else {
|
||||||
|
Ok(url.to_string())
|
||||||
|
}
|
||||||
|
})),
|
||||||
|
description: ".b_caption > p, p.b_algoSlug",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
28
src/engines/search/brave.rs
Normal file
28
src/engines/search/brave.rs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
use reqwest::Url;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
engines::EngineResponse,
|
||||||
|
parse::{parse_html_response_with_opts, ParseOpts},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn request(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
|
||||||
|
client
|
||||||
|
.get(Url::parse_with_params("https://search.brave.com/search", &[("q", query)]).unwrap())
|
||||||
|
.header(
|
||||||
|
"User-Agent",
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||||
|
)
|
||||||
|
.header("Accept-Language", "en-US,en;q=0.5")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||||
|
parse_html_response_with_opts(
|
||||||
|
body,
|
||||||
|
ParseOpts {
|
||||||
|
result_item: "#results > .snippet[data-pos]:not(.standalone)",
|
||||||
|
title: ".url",
|
||||||
|
href: "a",
|
||||||
|
description: ".snippet-content",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
35
src/engines/search/google.rs
Normal file
35
src/engines/search/google.rs
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
use reqwest::Url;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
engines::EngineResponse,
|
||||||
|
parse::{parse_html_response_with_opts, ParseOpts},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn request(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
|
||||||
|
client
|
||||||
|
.get(
|
||||||
|
Url::parse_with_params(
|
||||||
|
"https://www.google.com/search",
|
||||||
|
// nfpr makes it not try to autocorrect
|
||||||
|
&[("q", query), ("nfpr", "1")],
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.header(
|
||||||
|
"User-Agent",
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||||
|
)
|
||||||
|
.header("Accept-Language", "en-US,en;q=0.5")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||||
|
parse_html_response_with_opts(
|
||||||
|
body,
|
||||||
|
ParseOpts {
|
||||||
|
result_item: "div.g, div.xpd",
|
||||||
|
title: "h3",
|
||||||
|
href: "a",
|
||||||
|
description: "div[data-sncf], div[style='-webkit-line-clamp:2']",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
14
src/main.rs
Normal file
14
src/main.rs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#![feature(lazy_cell)]
|
||||||
|
|
||||||
|
pub mod engines;
|
||||||
|
pub mod normalize;
|
||||||
|
pub mod parse;
|
||||||
|
pub mod web;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
// initialize tracing
|
||||||
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
|
web::run().await;
|
||||||
|
}
|
36
src/normalize.rs
Normal file
36
src/normalize.rs
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
use url::Url;
|
||||||
|
|
||||||
|
pub fn normalize_url(url: &str) -> eyre::Result<String> {
|
||||||
|
let mut url = Url::parse(url)?;
|
||||||
|
|
||||||
|
// make sure the scheme is https
|
||||||
|
if url.scheme() == "http" {
|
||||||
|
url.set_scheme("https").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove trailing slash
|
||||||
|
let path = url.path().to_string();
|
||||||
|
if let Some(path) = path.strip_suffix('/') {
|
||||||
|
url.set_path(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove ref_src tracking param
|
||||||
|
let query_pairs = url.query_pairs().into_owned();
|
||||||
|
let mut new_query_pairs = Vec::new();
|
||||||
|
for (key, value) in query_pairs {
|
||||||
|
if key != "ref_src" {
|
||||||
|
new_query_pairs.push((key, value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if new_query_pairs.is_empty() {
|
||||||
|
url.set_query(None);
|
||||||
|
} else {
|
||||||
|
url.set_query(Some(
|
||||||
|
&url::form_urlencoded::Serializer::new(String::new())
|
||||||
|
.extend_pairs(new_query_pairs)
|
||||||
|
.finish(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(url.to_string());
|
||||||
|
}
|
102
src/parse.rs
Normal file
102
src/parse.rs
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
//! Helper functions for parsing search engine responses.
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
engines::{EngineResponse, EngineSearchResult},
|
||||||
|
normalize::normalize_url,
|
||||||
|
};
|
||||||
|
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
pub struct ParseOpts<A, B, C>
|
||||||
|
where
|
||||||
|
A: Into<QueryMethod>,
|
||||||
|
B: Into<QueryMethod>,
|
||||||
|
C: Into<QueryMethod>,
|
||||||
|
{
|
||||||
|
pub result_item: &'static str,
|
||||||
|
pub title: A,
|
||||||
|
pub href: B,
|
||||||
|
pub description: C,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum QueryMethod {
|
||||||
|
CssSelector(&'static str),
|
||||||
|
Manual(Box<dyn Fn(&scraper::ElementRef) -> eyre::Result<String>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&'static str> for QueryMethod {
|
||||||
|
fn from(s: &'static str) -> Self {
|
||||||
|
QueryMethod::CssSelector(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn parse_html_response_with_opts<A, B, C>(
|
||||||
|
body: &str,
|
||||||
|
opts: ParseOpts<A, B, C>,
|
||||||
|
) -> eyre::Result<EngineResponse>
|
||||||
|
where
|
||||||
|
A: Into<QueryMethod>,
|
||||||
|
B: Into<QueryMethod>,
|
||||||
|
C: Into<QueryMethod>,
|
||||||
|
{
|
||||||
|
let dom = Html::parse_document(body);
|
||||||
|
|
||||||
|
let mut search_results = Vec::new();
|
||||||
|
|
||||||
|
let ParseOpts {
|
||||||
|
result_item: result_item_query,
|
||||||
|
title: title_query_method,
|
||||||
|
href: href_query_method,
|
||||||
|
description: description_query_method,
|
||||||
|
} = opts;
|
||||||
|
let title_query_method = title_query_method.into();
|
||||||
|
let href_query_method = href_query_method.into();
|
||||||
|
let description_query_method = description_query_method.into();
|
||||||
|
|
||||||
|
let result_item_query = Selector::parse(result_item_query).unwrap();
|
||||||
|
|
||||||
|
let result_items = dom.select(&result_item_query);
|
||||||
|
|
||||||
|
for result_item in result_items {
|
||||||
|
let title = match title_query_method {
|
||||||
|
QueryMethod::CssSelector(s) => result_item
|
||||||
|
.select(&Selector::parse(s).unwrap())
|
||||||
|
.next()
|
||||||
|
.map(|n| n.text().collect::<String>())
|
||||||
|
.unwrap_or_default(),
|
||||||
|
QueryMethod::Manual(ref f) => f(&result_item)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
let url = match href_query_method {
|
||||||
|
QueryMethod::CssSelector(s) => result_item
|
||||||
|
.select(&Selector::parse(s).unwrap())
|
||||||
|
.next()
|
||||||
|
.map(|n| {
|
||||||
|
n.value()
|
||||||
|
.attr("href")
|
||||||
|
.map(str::to_string)
|
||||||
|
.unwrap_or_else(|| n.text().collect::<String>())
|
||||||
|
})
|
||||||
|
.unwrap_or_default(),
|
||||||
|
QueryMethod::Manual(ref f) => f(&result_item)?,
|
||||||
|
};
|
||||||
|
let url = normalize_url(&url)?;
|
||||||
|
|
||||||
|
let description = match description_query_method {
|
||||||
|
QueryMethod::CssSelector(s) => result_item
|
||||||
|
.select(&Selector::parse(s).unwrap())
|
||||||
|
.next()
|
||||||
|
.map(|n| n.text().collect::<String>())
|
||||||
|
.unwrap_or_default(),
|
||||||
|
QueryMethod::Manual(ref f) => f(&result_item)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
search_results.push(EngineSearchResult {
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(EngineResponse { search_results })
|
||||||
|
}
|
18
src/web/index.html
Normal file
18
src/web/index.html
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>metasearch</title>
|
||||||
|
<link rel="stylesheet" href="/style.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<main>
|
||||||
|
<h1>metasearch</h1>
|
||||||
|
<form action="/search" method="get">
|
||||||
|
<input type="text" name="q" placeholder="Search" autofocus>
|
||||||
|
<input type="submit" value="Search">
|
||||||
|
</form>
|
||||||
|
</main>
|
||||||
|
</body>
|
||||||
|
</html>
|
8
src/web/index.rs
Normal file
8
src/web/index.rs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
use axum::{http::header, response::IntoResponse};
|
||||||
|
|
||||||
|
pub async fn route() -> impl IntoResponse {
|
||||||
|
(
|
||||||
|
[(header::CONTENT_TYPE, "text/html; charset=utf-8")],
|
||||||
|
include_str!("index.html"),
|
||||||
|
)
|
||||||
|
}
|
15
src/web/mod.rs
Normal file
15
src/web/mod.rs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
pub mod index;
|
||||||
|
pub mod search;
|
||||||
|
pub mod style_css;
|
||||||
|
|
||||||
|
use axum::{routing::get, Router};
|
||||||
|
|
||||||
|
pub async fn run() {
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/", get(index::route))
|
||||||
|
.route("/style.css", get(style_css::route))
|
||||||
|
.route("/search", get(search::route));
|
||||||
|
|
||||||
|
let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap();
|
||||||
|
axum::serve(listener, app).await.unwrap();
|
||||||
|
}
|
143
src/web/search.rs
Normal file
143
src/web/search.rs
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use async_stream::stream;
|
||||||
|
use axum::{
|
||||||
|
body::Body,
|
||||||
|
extract::Query,
|
||||||
|
http::{header, StatusCode},
|
||||||
|
response::IntoResponse,
|
||||||
|
};
|
||||||
|
use bytes::Bytes;
|
||||||
|
use html_escape::{encode_text, encode_unquoted_attribute};
|
||||||
|
|
||||||
|
use crate::engines;
|
||||||
|
|
||||||
|
fn render_beginning_of_html(query: &str) -> String {
|
||||||
|
format!(
|
||||||
|
r#"<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>{} - metasearch</title>
|
||||||
|
<link rel="stylesheet" href="/style.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<main>
|
||||||
|
<form action="/search" method="get" class="search-form">
|
||||||
|
<input type="text" name="q" placeholder="Search" value="{}">
|
||||||
|
<input type="submit" value="Search">
|
||||||
|
</form>
|
||||||
|
<div class="progress-updates">
|
||||||
|
"#,
|
||||||
|
encode_text(query),
|
||||||
|
encode_unquoted_attribute(query)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_end_of_html() -> String {
|
||||||
|
r#"</main></body></html>"#.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_search_result(result: &engines::SearchResult) -> String {
|
||||||
|
let engines_html = result
|
||||||
|
.engines
|
||||||
|
.iter()
|
||||||
|
.map(|engine| {
|
||||||
|
format!(
|
||||||
|
r#"<span class="search-result-engines-item">{}</span>"#,
|
||||||
|
encode_text(&engine.name())
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("");
|
||||||
|
|
||||||
|
format!(
|
||||||
|
r#"<div class="search-result">
|
||||||
|
<a class="search-result-anchor" href="{url_attr}">
|
||||||
|
<span class="search-result-url" href="{url_attr}">{url}</span>
|
||||||
|
<h3 class="search-result-title">{title}</h3>
|
||||||
|
</a>
|
||||||
|
<p class="search-result-description">{desc}</p>
|
||||||
|
<div class="search-result-engines">{engines_html}</div>
|
||||||
|
</div>
|
||||||
|
"#,
|
||||||
|
url_attr = encode_unquoted_attribute(&result.url),
|
||||||
|
url = encode_text(&result.url),
|
||||||
|
title = encode_text(&result.title),
|
||||||
|
desc = encode_text(&result.description)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
|
||||||
|
let query = params
|
||||||
|
.get("q")
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.trim()
|
||||||
|
.replace('\n', " ");
|
||||||
|
if query.is_empty() {
|
||||||
|
// redirect to index
|
||||||
|
return (
|
||||||
|
StatusCode::FOUND,
|
||||||
|
[
|
||||||
|
(header::LOCATION, "/"),
|
||||||
|
(header::CONTENT_TYPE, "text/html; charset=utf-8"),
|
||||||
|
],
|
||||||
|
Body::from("<a href=\"/\">No query provided, click here to go back to index</a>"),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let s = stream! {
|
||||||
|
type R = Result<Bytes, eyre::Error>;
|
||||||
|
|
||||||
|
yield R::Ok(Bytes::from(render_beginning_of_html(&query)));
|
||||||
|
|
||||||
|
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
|
||||||
|
|
||||||
|
let search_future = tokio::spawn(async move { engines::search(&query, progress_tx).await });
|
||||||
|
|
||||||
|
while let Some(progress_update) = progress_rx.recv().await {
|
||||||
|
let progress_html = format!(
|
||||||
|
r#"<p class="progress-update">{}</p>"#,
|
||||||
|
encode_text(&progress_update.to_string())
|
||||||
|
);
|
||||||
|
yield R::Ok(Bytes::from(progress_html));
|
||||||
|
}
|
||||||
|
|
||||||
|
let results = match search_future.await? {
|
||||||
|
Ok(results) => results,
|
||||||
|
Err(e) => {
|
||||||
|
let error_html = format!(
|
||||||
|
r#"<h1>Error: {}</p>"#,
|
||||||
|
encode_text(&e.to_string())
|
||||||
|
);
|
||||||
|
yield R::Ok(Bytes::from(error_html));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut second_half = String::new();
|
||||||
|
|
||||||
|
second_half.push_str("</div>"); // close progress-updates
|
||||||
|
second_half.push_str("<style>.progress-updates{display:none}</style>");
|
||||||
|
for result in results.search_results {
|
||||||
|
second_half.push_str(&render_search_result(&result));
|
||||||
|
}
|
||||||
|
second_half.push_str(&render_end_of_html());
|
||||||
|
|
||||||
|
yield Ok(Bytes::from(second_half));
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
let stream = Body::from_stream(s);
|
||||||
|
|
||||||
|
(
|
||||||
|
StatusCode::OK,
|
||||||
|
[
|
||||||
|
(header::CONTENT_TYPE, "text/html; charset=utf-8"),
|
||||||
|
(header::TRANSFER_ENCODING, "chunked"),
|
||||||
|
],
|
||||||
|
stream,
|
||||||
|
)
|
||||||
|
}
|
70
src/web/style.css
Normal file
70
src/web/style.css
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
body {
|
||||||
|
font-family: monospace;
|
||||||
|
background-color: #0b0e14;
|
||||||
|
color: #bfbdb6;
|
||||||
|
margin: 0;
|
||||||
|
line-height: 1.2;
|
||||||
|
}
|
||||||
|
main {
|
||||||
|
max-width: 40rem;
|
||||||
|
padding: 1rem 0.5rem;
|
||||||
|
margin: 0 auto;
|
||||||
|
background-color: #0d1017;
|
||||||
|
}
|
||||||
|
input {
|
||||||
|
font-family: monospace;
|
||||||
|
background-color: #0d1017;
|
||||||
|
color: #bfbdb6;
|
||||||
|
border: 1px solid #234;
|
||||||
|
font-size: inherit;
|
||||||
|
padding: 0.25rem;
|
||||||
|
}
|
||||||
|
input[type="submit"] {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-form {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-result {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
padding-top: 1rem;
|
||||||
|
border-top: 1px solid #234;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-result-anchor {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-result-url {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #998;
|
||||||
|
}
|
||||||
|
.search-result-title {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 1.2em;
|
||||||
|
color: #29e;
|
||||||
|
}
|
||||||
|
.search-result-description {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #bba;
|
||||||
|
}
|
||||||
|
.search-result-engines {
|
||||||
|
opacity: 0.5;
|
||||||
|
float: right;
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.progress-updates {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
border: 1px solid #234;
|
||||||
|
padding: 0.5rem;
|
||||||
|
}
|
||||||
|
.progress-update {
|
||||||
|
margin: 0;
|
||||||
|
}
|
8
src/web/style_css.rs
Normal file
8
src/web/style_css.rs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
use axum::{http::header, response::IntoResponse};
|
||||||
|
|
||||||
|
pub async fn route() -> impl IntoResponse {
|
||||||
|
(
|
||||||
|
[(header::CONTENT_TYPE, "text/css; charset=utf-8")],
|
||||||
|
include_str!("style.css"),
|
||||||
|
)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user