From 359b8ae2d62f2798295c411298bc072515e57c37 Mon Sep 17 00:00:00 2001 From: mat Date: Wed, 20 Dec 2023 03:28:38 -0600 Subject: [PATCH] add basic autocomplete --- Cargo.lock | 1 + Cargo.toml | 1 + README | 1 + src/engines/mod.rs | 115 +++++++++++++++++++++++++++-------- src/engines/search/bing.rs | 2 +- src/engines/search/brave.rs | 2 +- src/engines/search/google.rs | 34 +++++++++++ src/normalize.rs | 3 + src/web/assets/index.html | 3 +- src/web/assets/script.js | 23 +++++++ src/web/assets/style.css | 6 +- src/web/autocomplete.rs | 24 ++++++++ src/web/mod.rs | 13 +++- src/web/search.rs | 26 ++++++-- 14 files changed, 220 insertions(+), 34 deletions(-) create mode 100644 README create mode 100644 src/web/assets/script.js create mode 100644 src/web/autocomplete.rs diff --git a/Cargo.lock b/Cargo.lock index 645ddc1..e3cb8c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -796,6 +796,7 @@ dependencies = [ "html-escape", "reqwest", "scraper", + "serde_json", "tokio", "tokio-stream", "tracing-subscriber", diff --git a/Cargo.toml b/Cargo.toml index edaf47c..087a8dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ reqwest = { version = "0.11.23", default-features = false, features = [ "rustls-tls", ] } scraper = "0.18.1" +serde_json = "1.0.108" tokio = { version = "1.35.0", features = ["full"] } tokio-stream = "0.1.14" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } diff --git a/README b/README new file mode 100644 index 0000000..8b3fe26 --- /dev/null +++ b/README @@ -0,0 +1 @@ +a cute metasearch engine diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 6b0bc93..1f7f4f5 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -1,6 +1,5 @@ use std::{ collections::{BTreeSet, HashMap}, - fmt, sync::LazyLock, time::Instant, }; @@ -24,7 +23,7 @@ impl Engine { &[Engine::Google, Engine::Bing, Engine::Brave] } - pub fn name(&self) -> &'static str { + pub fn id(&self) -> &'static str { match self { Engine::Google => "google", Engine::Bing => "bing", @@ -32,6 +31,14 @@ impl Engine { } } + pub fn weight(&self) -> f64 { + match self { + Engine::Google => 1.05, + Engine::Bing => 1., + Engine::Brave => 1.25, + } + } + pub fn request(&self, client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder { match self { Engine::Google => google::request(client, query), @@ -48,11 +55,21 @@ impl Engine { } } - pub fn weight(&self) -> f64 { + pub fn request_autocomplete( + &self, + client: &reqwest::Client, + query: &str, + ) -> Option { match self { - Engine::Google => 1.05, - Engine::Bing => 1., - Engine::Brave => 1.25, + Engine::Google => Some(google::request_autocomplete(client, query)), + _ => None, + } + } + + pub fn parse_autocomplete_response(&self, body: &str) -> eyre::Result> { + match self { + Engine::Google => google::parse_autocomplete_response(body), + _ => Ok(Vec::new()), } } } @@ -102,25 +119,6 @@ impl ProgressUpdate { } } -impl fmt::Display for ProgressUpdate { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let message = match self.kind { - ProgressUpdateKind::Requesting => "requesting", - ProgressUpdateKind::Downloading => "downloading", - ProgressUpdateKind::Parsing => "parsing", - ProgressUpdateKind::Done => "done", - }; - - write!( - f, - r#"{time:>4}ms {engine} {message}"#, - time = self.time, - message = message, - engine = self.engine.name() - ) - } -} - pub async fn search_with_client_and_engines( client: &reqwest::Client, engines: &[Engine], @@ -179,6 +177,35 @@ pub async fn search_with_client_and_engines( Ok(merge_engine_responses(responses)) } +pub async fn autocomplete_with_client_and_engines( + client: &reqwest::Client, + engines: &[Engine], + query: &str, +) -> eyre::Result> { + let mut requests = Vec::new(); + for engine in engines { + if let Some(request) = engine.request_autocomplete(client, query) { + requests.push(async { + let res = request.send().await?; + let body = res.text().await?; + let response = engine.parse_autocomplete_response(&body)?; + Ok((*engine, response)) + }); + } + } + + let mut autocomplete_futures = Vec::new(); + for request in requests { + autocomplete_futures.push(request); + } + + let autocomplete_results_result: eyre::Result> = + join_all(autocomplete_futures).await.into_iter().collect(); + let autocomplete_results = autocomplete_results_result?; + + Ok(merge_autocomplete_responses(autocomplete_results)) +} + static CLIENT: LazyLock = LazyLock::new(|| reqwest::Client::new()); pub async fn search( @@ -189,6 +216,11 @@ pub async fn search( search_with_client_and_engines(&CLIENT, &engines, query, progress_tx).await } +pub async fn autocomplete(query: &str) -> eyre::Result> { + let engines = Engine::all(); + autocomplete_with_client_and_engines(&CLIENT, &engines, query).await +} + #[derive(Debug)] pub struct Response { pub search_results: Vec, @@ -276,3 +308,36 @@ fn merge_engine_responses(responses: HashMap) -> Respons featured_snippet, } } + +pub struct AutocompleteResult { + pub query: String, + pub score: f64, +} + +fn merge_autocomplete_responses(responses: HashMap>) -> Vec { + let mut autocomplete_results: Vec = Vec::new(); + + for (engine, response) in responses { + for (result_index, autocomplete_result) in response.into_iter().enumerate() { + // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a score of 0.33, etc. + let base_result_score = 1. / (result_index + 1) as f64; + let result_score = base_result_score * engine.weight(); + + if let Some(existing_result) = autocomplete_results + .iter_mut() + .find(|r| r.query == autocomplete_result) + { + existing_result.score += result_score; + } else { + autocomplete_results.push(AutocompleteResult { + query: autocomplete_result, + score: result_score, + }); + } + } + } + + autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); + + autocomplete_results.into_iter().map(|r| r.query).collect() +} diff --git a/src/engines/search/bing.rs b/src/engines/search/bing.rs index 8815304..6d1184e 100644 --- a/src/engines/search/bing.rs +++ b/src/engines/search/bing.rs @@ -38,7 +38,7 @@ pub fn parse_response(body: &str) -> eyre::Result { .unwrap_or_default(); clean_url(url) }))) - .description(".b_caption > p, p.b_algoSlug"), + .description(".b_caption > p, p.b_algoSlug, .b_caption .ipText"), ) } diff --git a/src/engines/search/brave.rs b/src/engines/search/brave.rs index 6b20629..2774904 100644 --- a/src/engines/search/brave.rs +++ b/src/engines/search/brave.rs @@ -22,6 +22,6 @@ pub fn parse_response(body: &str) -> eyre::Result { .result("#results > .snippet[data-pos]:not(.standalone)") .title(".url") .href("a") - .description(".snippet-content"), + .description(".snippet-content, .video-snippet > .snippet-description"), ) } diff --git a/src/engines/search/google.rs b/src/engines/search/google.rs index d280576..61f502a 100644 --- a/src/engines/search/google.rs +++ b/src/engines/search/google.rs @@ -45,6 +45,40 @@ pub fn parse_response(body: &str) -> eyre::Result { ) } +pub fn request_autocomplete(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder { + client + .get( + Url::parse_with_params( + "https://suggestqueries.google.com/complete/search", + &[ + ("output", "firefox"), + ("client", "firefox"), + ("hl", "US-en"), + ("q", query), + ], + ) + .unwrap(), + ) + .header( + "User-Agent", + "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0", + ) +} + +pub fn parse_autocomplete_response(body: &str) -> eyre::Result> { + let res = serde_json::from_str::>(body)?; + Ok(res + .into_iter() + .nth(1) + .unwrap_or_default() + .as_array() + .cloned() + .unwrap_or_default() + .into_iter() + .map(|v| v.as_str().unwrap_or_default().to_string()) + .collect()) +} + fn clean_url(url: &str) -> eyre::Result { if url.starts_with("/url?q=") { // get the q param diff --git a/src/normalize.rs b/src/normalize.rs index 30bc1a3..ea25732 100644 --- a/src/normalize.rs +++ b/src/normalize.rs @@ -12,6 +12,9 @@ pub fn normalize_url(url: &str) -> eyre::Result { url.set_scheme("https").unwrap(); } + // remove fragment + url.set_fragment(None); + // remove trailing slash let path = url.path().to_string(); if let Some(path) = path.strip_suffix('/') { diff --git a/src/web/assets/index.html b/src/web/assets/index.html index 26cd793..62868cf 100644 --- a/src/web/assets/index.html +++ b/src/web/assets/index.html @@ -5,12 +5,13 @@ metasearch +

metasearch

- +
diff --git a/src/web/assets/script.js b/src/web/assets/script.js new file mode 100644 index 0000000..ed4aa30 --- /dev/null +++ b/src/web/assets/script.js @@ -0,0 +1,23 @@ +// add a datalist after the search input +const searchInputEl = document.getElementById("search-input"); +const datalistEl = document.createElement("datalist"); +datalistEl.id = "search-input-datalist"; +searchInputEl.setAttribute("list", datalistEl.id); +searchInputEl.insertAdjacentElement("afterend", datalistEl); + +// update the datalist options on input +searchInputEl.addEventListener("input", async (e) => { + const value = e.target.value; + + const res = await fetch(`/autocomplete?q=${value}`).then((res) => res.json()); + const options = res[1]; + + console.log(options); + + datalistEl.innerHTML = ""; + options.forEach((option) => { + const optionEl = document.createElement("option"); + optionEl.value = option; + datalistEl.appendChild(optionEl); + }); +}); diff --git a/src/web/assets/style.css b/src/web/assets/style.css index 7cdc0f2..1ae92e4 100644 --- a/src/web/assets/style.css +++ b/src/web/assets/style.css @@ -46,7 +46,7 @@ h1 { .search-form { margin-bottom: 1rem; } -.search-input { +#search-input { width: 20em; } @@ -111,3 +111,7 @@ h1 { .progress-update-time { opacity: 0.5; } +.progress-update-done { + color: #7fd962; + font-weight: bold; +} diff --git a/src/web/autocomplete.rs b/src/web/autocomplete.rs new file mode 100644 index 0000000..b69d8bf --- /dev/null +++ b/src/web/autocomplete.rs @@ -0,0 +1,24 @@ +use std::collections::HashMap; + +use axum::{extract::Query, http::StatusCode, response::IntoResponse, Json}; + +use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response}; + +pub async fn route(Query(params): Query>) -> impl IntoResponse { + let query = params + .get("q") + .cloned() + .unwrap_or_default() + .trim() + .replace('\n', " "); + + let res = match engines::autocomplete(&query).await { + Ok(res) => res, + Err(err) => { + eprintln!("Error: {}", err); + return (StatusCode::INTERNAL_SERVER_ERROR, Json((query, vec![]))); + } + }; + + (StatusCode::OK, Json((query, res))) +} diff --git a/src/web/mod.rs b/src/web/mod.rs index 4211972..a55f3e5 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -1,3 +1,4 @@ +pub mod autocomplete; pub mod search; use axum::{http::header, routing::get, Router}; @@ -24,7 +25,17 @@ pub async fn run() { ) }), ) - .route("/search", get(search::route)); + .route( + "/script.js", + get(|| async { + ( + [(header::CONTENT_TYPE, "text/javascript; charset=utf-8")], + include_str!("assets/script.js"), + ) + }), + ) + .route("/search", get(search::route)) + .route("/autocomplete", get(autocomplete::route)); println!("Listening on {BIND_ADDRESS}"); diff --git a/src/web/search.rs b/src/web/search.rs index ee39408..688b420 100644 --- a/src/web/search.rs +++ b/src/web/search.rs @@ -10,7 +10,7 @@ use axum::{ use bytes::Bytes; use html_escape::{encode_text, encode_unquoted_attribute}; -use crate::engines::{self, Response}; +use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response}; fn render_beginning_of_html(query: &str) -> String { format!( @@ -21,11 +21,12 @@ fn render_beginning_of_html(query: &str) -> String { {} - metasearch +
- +
@@ -44,7 +45,7 @@ fn render_engine_list(engines: &[engines::Engine]) -> String { for engine in engines { html.push_str(&format!( r#"{engine}"#, - engine = encode_text(&engine.name()) + engine = encode_text(&engine.id()) )); } format!(r#"
{html}
"#) @@ -99,6 +100,22 @@ fn render_results(response: Response) -> String { html } +fn render_progress_update(progress_update: &ProgressUpdate) -> String { + let message: &str = match progress_update.kind { + ProgressUpdateKind::Requesting => "requesting", + ProgressUpdateKind::Downloading => "downloading", + ProgressUpdateKind::Parsing => "parsing", + ProgressUpdateKind::Done => "done", + }; + + format!( + r#"{time:>4}ms {engine} {message}"#, + time = progress_update.time, + message = message, + engine = progress_update.engine.id() + ) +} + pub async fn route(Query(params): Query>) -> impl IntoResponse { let query = params .get("q") @@ -129,7 +146,8 @@ pub async fn route(Query(params): Query>) -> impl IntoRe while let Some(progress_update) = progress_rx.recv().await { let progress_html = format!( - r#"

{progress_update}

"# + r#"

{}

"#, + render_progress_update(&progress_update) ); yield R::Ok(Bytes::from(progress_html)); }