add basic autocomplete
This commit is contained in:
parent
aeac6f7c5d
commit
359b8ae2d6
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -796,6 +796,7 @@ dependencies = [
|
||||
"html-escape",
|
||||
"reqwest",
|
||||
"scraper",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing-subscriber",
|
||||
|
@ -18,6 +18,7 @@ reqwest = { version = "0.11.23", default-features = false, features = [
|
||||
"rustls-tls",
|
||||
] }
|
||||
scraper = "0.18.1"
|
||||
serde_json = "1.0.108"
|
||||
tokio = { version = "1.35.0", features = ["full"] }
|
||||
tokio-stream = "0.1.14"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||
|
@ -1,6 +1,5 @@
|
||||
use std::{
|
||||
collections::{BTreeSet, HashMap},
|
||||
fmt,
|
||||
sync::LazyLock,
|
||||
time::Instant,
|
||||
};
|
||||
@ -24,7 +23,7 @@ impl Engine {
|
||||
&[Engine::Google, Engine::Bing, Engine::Brave]
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &'static str {
|
||||
pub fn id(&self) -> &'static str {
|
||||
match self {
|
||||
Engine::Google => "google",
|
||||
Engine::Bing => "bing",
|
||||
@ -32,6 +31,14 @@ impl Engine {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn weight(&self) -> f64 {
|
||||
match self {
|
||||
Engine::Google => 1.05,
|
||||
Engine::Bing => 1.,
|
||||
Engine::Brave => 1.25,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn request(&self, client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
|
||||
match self {
|
||||
Engine::Google => google::request(client, query),
|
||||
@ -48,11 +55,21 @@ impl Engine {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn weight(&self) -> f64 {
|
||||
pub fn request_autocomplete(
|
||||
&self,
|
||||
client: &reqwest::Client,
|
||||
query: &str,
|
||||
) -> Option<reqwest::RequestBuilder> {
|
||||
match self {
|
||||
Engine::Google => 1.05,
|
||||
Engine::Bing => 1.,
|
||||
Engine::Brave => 1.25,
|
||||
Engine::Google => Some(google::request_autocomplete(client, query)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_autocomplete_response(&self, body: &str) -> eyre::Result<Vec<String>> {
|
||||
match self {
|
||||
Engine::Google => google::parse_autocomplete_response(body),
|
||||
_ => Ok(Vec::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -102,25 +119,6 @@ impl ProgressUpdate {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ProgressUpdate {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let message = match self.kind {
|
||||
ProgressUpdateKind::Requesting => "requesting",
|
||||
ProgressUpdateKind::Downloading => "downloading",
|
||||
ProgressUpdateKind::Parsing => "parsing",
|
||||
ProgressUpdateKind::Done => "<b>done</b>",
|
||||
};
|
||||
|
||||
write!(
|
||||
f,
|
||||
r#"<span class="progress-update-time">{time:>4}ms</span> {engine} {message}"#,
|
||||
time = self.time,
|
||||
message = message,
|
||||
engine = self.engine.name()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn search_with_client_and_engines(
|
||||
client: &reqwest::Client,
|
||||
engines: &[Engine],
|
||||
@ -179,6 +177,35 @@ pub async fn search_with_client_and_engines(
|
||||
Ok(merge_engine_responses(responses))
|
||||
}
|
||||
|
||||
pub async fn autocomplete_with_client_and_engines(
|
||||
client: &reqwest::Client,
|
||||
engines: &[Engine],
|
||||
query: &str,
|
||||
) -> eyre::Result<Vec<String>> {
|
||||
let mut requests = Vec::new();
|
||||
for engine in engines {
|
||||
if let Some(request) = engine.request_autocomplete(client, query) {
|
||||
requests.push(async {
|
||||
let res = request.send().await?;
|
||||
let body = res.text().await?;
|
||||
let response = engine.parse_autocomplete_response(&body)?;
|
||||
Ok((*engine, response))
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut autocomplete_futures = Vec::new();
|
||||
for request in requests {
|
||||
autocomplete_futures.push(request);
|
||||
}
|
||||
|
||||
let autocomplete_results_result: eyre::Result<HashMap<_, _>> =
|
||||
join_all(autocomplete_futures).await.into_iter().collect();
|
||||
let autocomplete_results = autocomplete_results_result?;
|
||||
|
||||
Ok(merge_autocomplete_responses(autocomplete_results))
|
||||
}
|
||||
|
||||
static CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| reqwest::Client::new());
|
||||
|
||||
pub async fn search(
|
||||
@ -189,6 +216,11 @@ pub async fn search(
|
||||
search_with_client_and_engines(&CLIENT, &engines, query, progress_tx).await
|
||||
}
|
||||
|
||||
pub async fn autocomplete(query: &str) -> eyre::Result<Vec<String>> {
|
||||
let engines = Engine::all();
|
||||
autocomplete_with_client_and_engines(&CLIENT, &engines, query).await
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Response {
|
||||
pub search_results: Vec<SearchResult>,
|
||||
@ -276,3 +308,36 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
|
||||
featured_snippet,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AutocompleteResult {
|
||||
pub query: String,
|
||||
pub score: f64,
|
||||
}
|
||||
|
||||
fn merge_autocomplete_responses(responses: HashMap<Engine, Vec<String>>) -> Vec<String> {
|
||||
let mut autocomplete_results: Vec<AutocompleteResult> = Vec::new();
|
||||
|
||||
for (engine, response) in responses {
|
||||
for (result_index, autocomplete_result) in response.into_iter().enumerate() {
|
||||
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a score of 0.33, etc.
|
||||
let base_result_score = 1. / (result_index + 1) as f64;
|
||||
let result_score = base_result_score * engine.weight();
|
||||
|
||||
if let Some(existing_result) = autocomplete_results
|
||||
.iter_mut()
|
||||
.find(|r| r.query == autocomplete_result)
|
||||
{
|
||||
existing_result.score += result_score;
|
||||
} else {
|
||||
autocomplete_results.push(AutocompleteResult {
|
||||
query: autocomplete_result,
|
||||
score: result_score,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||
|
||||
autocomplete_results.into_iter().map(|r| r.query).collect()
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||
.unwrap_or_default();
|
||||
clean_url(url)
|
||||
})))
|
||||
.description(".b_caption > p, p.b_algoSlug"),
|
||||
.description(".b_caption > p, p.b_algoSlug, .b_caption .ipText"),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,6 @@ pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||
.result("#results > .snippet[data-pos]:not(.standalone)")
|
||||
.title(".url")
|
||||
.href("a")
|
||||
.description(".snippet-content"),
|
||||
.description(".snippet-content, .video-snippet > .snippet-description"),
|
||||
)
|
||||
}
|
||||
|
@ -45,6 +45,40 @@ pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn request_autocomplete(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
|
||||
client
|
||||
.get(
|
||||
Url::parse_with_params(
|
||||
"https://suggestqueries.google.com/complete/search",
|
||||
&[
|
||||
("output", "firefox"),
|
||||
("client", "firefox"),
|
||||
("hl", "US-en"),
|
||||
("q", query),
|
||||
],
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.header(
|
||||
"User-Agent",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_autocomplete_response(body: &str) -> eyre::Result<Vec<String>> {
|
||||
let res = serde_json::from_str::<Vec<serde_json::Value>>(body)?;
|
||||
Ok(res
|
||||
.into_iter()
|
||||
.nth(1)
|
||||
.unwrap_or_default()
|
||||
.as_array()
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|v| v.as_str().unwrap_or_default().to_string())
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn clean_url(url: &str) -> eyre::Result<String> {
|
||||
if url.starts_with("/url?q=") {
|
||||
// get the q param
|
||||
|
@ -12,6 +12,9 @@ pub fn normalize_url(url: &str) -> eyre::Result<String> {
|
||||
url.set_scheme("https").unwrap();
|
||||
}
|
||||
|
||||
// remove fragment
|
||||
url.set_fragment(None);
|
||||
|
||||
// remove trailing slash
|
||||
let path = url.path().to_string();
|
||||
if let Some(path) = path.strip_suffix('/') {
|
||||
|
@ -5,12 +5,13 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>metasearch</title>
|
||||
<link rel="stylesheet" href="/style.css">
|
||||
<script src="/script.js" defer></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="main-container">
|
||||
<h1>metasearch</h1>
|
||||
<form action="/search" method="get">
|
||||
<input type="text" name="q" placeholder="Search" class="search-input" autofocus>
|
||||
<input type="text" name="q" placeholder="Search" id="search-input" autofocus onfocus="this.select()" autocomplete="off">
|
||||
<input type="submit" value="Search">
|
||||
</form>
|
||||
</div>
|
||||
|
23
src/web/assets/script.js
Normal file
23
src/web/assets/script.js
Normal file
@ -0,0 +1,23 @@
|
||||
// add a datalist after the search input
|
||||
const searchInputEl = document.getElementById("search-input");
|
||||
const datalistEl = document.createElement("datalist");
|
||||
datalistEl.id = "search-input-datalist";
|
||||
searchInputEl.setAttribute("list", datalistEl.id);
|
||||
searchInputEl.insertAdjacentElement("afterend", datalistEl);
|
||||
|
||||
// update the datalist options on input
|
||||
searchInputEl.addEventListener("input", async (e) => {
|
||||
const value = e.target.value;
|
||||
|
||||
const res = await fetch(`/autocomplete?q=${value}`).then((res) => res.json());
|
||||
const options = res[1];
|
||||
|
||||
console.log(options);
|
||||
|
||||
datalistEl.innerHTML = "";
|
||||
options.forEach((option) => {
|
||||
const optionEl = document.createElement("option");
|
||||
optionEl.value = option;
|
||||
datalistEl.appendChild(optionEl);
|
||||
});
|
||||
});
|
@ -46,7 +46,7 @@ h1 {
|
||||
.search-form {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
.search-input {
|
||||
#search-input {
|
||||
width: 20em;
|
||||
}
|
||||
|
||||
@ -111,3 +111,7 @@ h1 {
|
||||
.progress-update-time {
|
||||
opacity: 0.5;
|
||||
}
|
||||
.progress-update-done {
|
||||
color: #7fd962;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
24
src/web/autocomplete.rs
Normal file
24
src/web/autocomplete.rs
Normal file
@ -0,0 +1,24 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use axum::{extract::Query, http::StatusCode, response::IntoResponse, Json};
|
||||
|
||||
use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response};
|
||||
|
||||
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
|
||||
let query = params
|
||||
.get("q")
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.replace('\n', " ");
|
||||
|
||||
let res = match engines::autocomplete(&query).await {
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
eprintln!("Error: {}", err);
|
||||
return (StatusCode::INTERNAL_SERVER_ERROR, Json((query, vec![])));
|
||||
}
|
||||
};
|
||||
|
||||
(StatusCode::OK, Json((query, res)))
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
pub mod autocomplete;
|
||||
pub mod search;
|
||||
|
||||
use axum::{http::header, routing::get, Router};
|
||||
@ -24,7 +25,17 @@ pub async fn run() {
|
||||
)
|
||||
}),
|
||||
)
|
||||
.route("/search", get(search::route));
|
||||
.route(
|
||||
"/script.js",
|
||||
get(|| async {
|
||||
(
|
||||
[(header::CONTENT_TYPE, "text/javascript; charset=utf-8")],
|
||||
include_str!("assets/script.js"),
|
||||
)
|
||||
}),
|
||||
)
|
||||
.route("/search", get(search::route))
|
||||
.route("/autocomplete", get(autocomplete::route));
|
||||
|
||||
println!("Listening on {BIND_ADDRESS}");
|
||||
|
||||
|
@ -10,7 +10,7 @@ use axum::{
|
||||
use bytes::Bytes;
|
||||
use html_escape::{encode_text, encode_unquoted_attribute};
|
||||
|
||||
use crate::engines::{self, Response};
|
||||
use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response};
|
||||
|
||||
fn render_beginning_of_html(query: &str) -> String {
|
||||
format!(
|
||||
@ -21,11 +21,12 @@ fn render_beginning_of_html(query: &str) -> String {
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{} - metasearch</title>
|
||||
<link rel="stylesheet" href="/style.css">
|
||||
<script src="/script.js" defer></script>
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<form action="/search" method="get" class="search-form">
|
||||
<input type="text" name="q" placeholder="Search" value="{}" class="search-input" autofocus>
|
||||
<input type="text" name="q" placeholder="Search" value="{}" id="search-input" autofocus onfocus="this.select()" autocomplete="off">
|
||||
<input type="submit" value="Search">
|
||||
</form>
|
||||
<div class="progress-updates">
|
||||
@ -44,7 +45,7 @@ fn render_engine_list(engines: &[engines::Engine]) -> String {
|
||||
for engine in engines {
|
||||
html.push_str(&format!(
|
||||
r#"<span class="engine-list-item">{engine}</span>"#,
|
||||
engine = encode_text(&engine.name())
|
||||
engine = encode_text(&engine.id())
|
||||
));
|
||||
}
|
||||
format!(r#"<div class="engine-list">{html}</div>"#)
|
||||
@ -99,6 +100,22 @@ fn render_results(response: Response) -> String {
|
||||
html
|
||||
}
|
||||
|
||||
fn render_progress_update(progress_update: &ProgressUpdate) -> String {
|
||||
let message: &str = match progress_update.kind {
|
||||
ProgressUpdateKind::Requesting => "requesting",
|
||||
ProgressUpdateKind::Downloading => "downloading",
|
||||
ProgressUpdateKind::Parsing => "parsing",
|
||||
ProgressUpdateKind::Done => "<span class=\"progress-update-done\">done</span>",
|
||||
};
|
||||
|
||||
format!(
|
||||
r#"<span class="progress-update-time">{time:>4}ms</span> {engine} {message}"#,
|
||||
time = progress_update.time,
|
||||
message = message,
|
||||
engine = progress_update.engine.id()
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
|
||||
let query = params
|
||||
.get("q")
|
||||
@ -129,7 +146,8 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
|
||||
|
||||
while let Some(progress_update) = progress_rx.recv().await {
|
||||
let progress_html = format!(
|
||||
r#"<p class="progress-update">{progress_update}</p>"#
|
||||
r#"<p class="progress-update">{}</p>"#,
|
||||
render_progress_update(&progress_update)
|
||||
);
|
||||
yield R::Ok(Bytes::from(progress_html));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user