add instant answers

This commit is contained in:
mat 2023-12-20 04:08:36 -06:00
parent 359b8ae2d6
commit bcf42a733a
12 changed files with 211 additions and 37 deletions

1
Cargo.lock generated
View File

@ -794,6 +794,7 @@ dependencies = [
"eyre", "eyre",
"futures", "futures",
"html-escape", "html-escape",
"regex",
"reqwest", "reqwest",
"scraper", "scraper",
"serde_json", "serde_json",

View File

@ -14,6 +14,7 @@ bytes = "1.5.0"
eyre = "0.6.11" eyre = "0.6.11"
futures = "0.3.29" futures = "0.3.29"
html-escape = "0.2.13" html-escape = "0.2.13"
regex = "1.10.2"
reqwest = { version = "0.11.23", default-features = false, features = [ reqwest = { version = "0.11.23", default-features = false, features = [
"rustls-tls", "rustls-tls",
] } ] }

10
src/engines/answer.rs Normal file
View File

@ -0,0 +1,10 @@
pub mod ip;
pub mod useragent;
macro_rules! regex {
($re:literal $(,)?) => {{
static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
RE.get_or_init(|| regex::Regex::new($re).unwrap())
}};
}
pub(crate) use regex;

13
src/engines/answer/ip.rs Normal file
View File

@ -0,0 +1,13 @@
use crate::engines::{EngineResponse, SearchQuery};
use super::regex;
pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse {
if !regex!("^what('s|s| is) my ip").is_match(&query.query.to_lowercase()) {
return EngineResponse::new();
}
let ip = &query.ip;
EngineResponse::answer_html(format!("Your IP address is <b>{ip}</b>"))
}

View File

@ -0,0 +1,19 @@
use crate::engines::{EngineResponse, SearchQuery};
use super::regex;
pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse {
if !regex!("^what('s|s| is) my (user ?agent|ua)|ua|user ?agent$")
.is_match(&query.query.to_lowercase())
{
return EngineResponse::new();
}
let user_agent = query.request_headers.get("user-agent");
EngineResponse::answer_html(if let Some(user_agent) = user_agent {
format!("Your user agent is <b>{user_agent}</b>")
} else {
format!("You don't have a user agent")
})
}

View File

@ -1,5 +1,6 @@
use std::{ use std::{
collections::{BTreeSet, HashMap}, collections::{BTreeSet, HashMap},
ops::Deref,
sync::LazyLock, sync::LazyLock,
time::Instant, time::Instant,
}; };
@ -9,18 +10,29 @@ use tokio::sync::mpsc;
use self::search::{bing, brave, google}; use self::search::{bing, brave, google};
pub mod answer;
pub mod search; pub mod search;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Engine { pub enum Engine {
// search
Google, Google,
Bing, Bing,
Brave, Brave,
// answer
Useragent,
Ip,
} }
impl Engine { impl Engine {
pub fn all() -> &'static [Engine] { pub fn all() -> &'static [Engine] {
&[Engine::Google, Engine::Bing, Engine::Brave] &[
Engine::Google,
Engine::Bing,
Engine::Brave,
Engine::Useragent,
Engine::Ip,
]
} }
pub fn id(&self) -> &'static str { pub fn id(&self) -> &'static str {
@ -28,6 +40,8 @@ impl Engine {
Engine::Google => "google", Engine::Google => "google",
Engine::Bing => "bing", Engine::Bing => "bing",
Engine::Brave => "brave", Engine::Brave => "brave",
Engine::Useragent => "useragent",
Engine::Ip => "ip",
} }
} }
@ -36,14 +50,17 @@ impl Engine {
Engine::Google => 1.05, Engine::Google => 1.05,
Engine::Bing => 1., Engine::Bing => 1.,
Engine::Brave => 1.25, Engine::Brave => 1.25,
_ => 1.,
} }
} }
pub fn request(&self, client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder { pub fn request(&self, client: &reqwest::Client, query: &SearchQuery) -> RequestResponse {
match self { match self {
Engine::Google => google::request(client, query), Engine::Google => google::request(client, query).into(),
Engine::Bing => bing::request(client, query), Engine::Bing => bing::request(client, query).into(),
Engine::Brave => brave::request(client, query), Engine::Brave => search::brave::request(client, query).into(),
Engine::Useragent => answer::useragent::request(client, query).into(),
Engine::Ip => answer::ip::request(client, query).into(),
} }
} }
@ -52,6 +69,7 @@ impl Engine {
Engine::Google => google::parse_response(body), Engine::Google => google::parse_response(body),
Engine::Bing => bing::parse_response(body), Engine::Bing => bing::parse_response(body),
Engine::Brave => brave::parse_response(body), Engine::Brave => brave::parse_response(body),
_ => eyre::bail!("engine {self:?} can't parse response"),
} }
} }
@ -74,6 +92,36 @@ impl Engine {
} }
} }
pub struct SearchQuery {
pub query: String,
pub request_headers: HashMap<String, String>,
pub ip: String,
}
impl Deref for SearchQuery {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.query
}
}
pub enum RequestResponse {
Http(reqwest::RequestBuilder),
Instant(EngineResponse),
}
impl From<reqwest::RequestBuilder> for RequestResponse {
fn from(req: reqwest::RequestBuilder) -> Self {
Self::Http(req)
}
}
impl From<EngineResponse> for RequestResponse {
fn from(res: EngineResponse) -> Self {
Self::Instant(res)
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct EngineSearchResult { pub struct EngineSearchResult {
pub url: String, pub url: String,
@ -88,10 +136,24 @@ pub struct EngineFeaturedSnippet {
pub description: String, pub description: String,
} }
#[derive(Debug)] #[derive(Debug, Default)]
pub struct EngineResponse { pub struct EngineResponse {
pub search_results: Vec<EngineSearchResult>, pub search_results: Vec<EngineSearchResult>,
pub featured_snippet: Option<EngineFeaturedSnippet>, pub featured_snippet: Option<EngineFeaturedSnippet>,
pub answer_html: Option<String>,
}
impl EngineResponse {
pub fn new() -> Self {
Self::default()
}
pub fn answer_html(html: String) -> Self {
Self {
answer_html: Some(html),
..Default::default()
}
}
} }
#[derive(Debug)] #[derive(Debug)]
@ -122,7 +184,7 @@ impl ProgressUpdate {
pub async fn search_with_client_and_engines( pub async fn search_with_client_and_engines(
client: &reqwest::Client, client: &reqwest::Client,
engines: &[Engine], engines: &[Engine],
query: &str, query: &SearchQuery,
progress_tx: mpsc::UnboundedSender<ProgressUpdate>, progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
) -> eyre::Result<Response> { ) -> eyre::Result<Response> {
let start_time = Instant::now(); let start_time = Instant::now();
@ -137,29 +199,38 @@ pub async fn search_with_client_and_engines(
start_time, start_time,
))?; ))?;
let res = engine.request(client, query).send().await?; let request_response = engine.request(client, query).into();
progress_tx.send(ProgressUpdate::new( let response = match request_response {
ProgressUpdateKind::Downloading, RequestResponse::Http(request) => {
engine, let res = request.send().await?;
start_time,
))?;
let body = res.text().await?; progress_tx.send(ProgressUpdate::new(
ProgressUpdateKind::Downloading,
engine,
start_time,
))?;
progress_tx.send(ProgressUpdate::new( let body = res.text().await?;
ProgressUpdateKind::Parsing,
engine,
start_time,
))?;
let response = engine.parse_response(&body)?; progress_tx.send(ProgressUpdate::new(
ProgressUpdateKind::Parsing,
engine,
start_time,
))?;
progress_tx.send(ProgressUpdate::new( let response = engine.parse_response(&body)?;
ProgressUpdateKind::Done,
engine, progress_tx.send(ProgressUpdate::new(
start_time, ProgressUpdateKind::Done,
))?; engine,
start_time,
))?;
response
}
RequestResponse::Instant(response) => response,
};
Ok((engine, response)) Ok((engine, response))
}); });
@ -209,11 +280,11 @@ pub async fn autocomplete_with_client_and_engines(
static CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| reqwest::Client::new()); static CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| reqwest::Client::new());
pub async fn search( pub async fn search(
query: &str, query: SearchQuery,
progress_tx: mpsc::UnboundedSender<ProgressUpdate>, progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
) -> eyre::Result<Response> { ) -> eyre::Result<Response> {
let engines = Engine::all(); let engines = Engine::all();
search_with_client_and_engines(&CLIENT, &engines, query, progress_tx).await search_with_client_and_engines(&CLIENT, &engines, &query, progress_tx).await
} }
pub async fn autocomplete(query: &str) -> eyre::Result<Vec<String>> { pub async fn autocomplete(query: &str) -> eyre::Result<Vec<String>> {
@ -225,6 +296,7 @@ pub async fn autocomplete(query: &str) -> eyre::Result<Vec<String>> {
pub struct Response { pub struct Response {
pub search_results: Vec<SearchResult>, pub search_results: Vec<SearchResult>,
pub featured_snippet: Option<FeaturedSnippet>, pub featured_snippet: Option<FeaturedSnippet>,
pub answer: Option<Answer>,
} }
#[derive(Debug)] #[derive(Debug)]
@ -244,9 +316,16 @@ pub struct FeaturedSnippet {
pub engine: Engine, pub engine: Engine,
} }
#[derive(Debug)]
pub struct Answer {
pub html: String,
pub engine: Engine,
}
fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Response { fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Response {
let mut search_results: Vec<SearchResult> = Vec::new(); let mut search_results: Vec<SearchResult> = Vec::new();
let mut featured_snippet: Option<FeaturedSnippet> = None; let mut featured_snippet: Option<FeaturedSnippet> = None;
let mut answer: Option<Answer> = None;
for (engine, response) in responses { for (engine, response) in responses {
for (result_index, search_result) in response.search_results.into_iter().enumerate() { for (result_index, search_result) in response.search_results.into_iter().enumerate() {
@ -299,6 +378,17 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
}); });
} }
} }
if let Some(engine_answer_html) = response.answer_html {
// if it has a higher weight than the current answer
let answer_weight = answer.as_ref().map(|s| s.engine.weight()).unwrap_or(0.);
if engine.weight() > answer_weight {
answer = Some(Answer {
html: engine_answer_html,
engine,
});
}
}
} }
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
@ -306,6 +396,7 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
Response { Response {
search_results, search_results,
featured_snippet, featured_snippet,
answer,
} }
} }

View File

@ -176,5 +176,7 @@ pub(super) fn parse_html_response_with_opts(
Ok(EngineResponse { Ok(EngineResponse {
search_results, search_results,
featured_snippet, featured_snippet,
// this field is used by instant answers, not normal search engines
answer_html: None,
}) })
} }

View File

@ -12,8 +12,6 @@ searchInputEl.addEventListener("input", async (e) => {
const res = await fetch(`/autocomplete?q=${value}`).then((res) => res.json()); const res = await fetch(`/autocomplete?q=${value}`).then((res) => res.json());
const options = res[1]; const options = res[1];
console.log(options);
datalistEl.innerHTML = ""; datalistEl.innerHTML = "";
options.forEach((option) => { options.forEach((option) => {
const optionEl = document.createElement("option"); const optionEl = document.createElement("option");

View File

@ -97,6 +97,13 @@ h1 {
margin-bottom: 1rem; margin-bottom: 1rem;
} }
/* answer */
.answer {
margin-bottom: 1rem;
border: 1px solid #234;
padding: 0.5rem;
}
/* progress update */ /* progress update */
.progress-updates { .progress-updates {
margin-bottom: 1rem; margin-bottom: 1rem;

View File

@ -2,7 +2,7 @@ use std::collections::HashMap;
use axum::{extract::Query, http::StatusCode, response::IntoResponse, Json}; use axum::{extract::Query, http::StatusCode, response::IntoResponse, Json};
use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response}; use crate::engines;
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse { pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
let query = params let query = params

View File

@ -1,6 +1,8 @@
pub mod autocomplete; pub mod autocomplete;
pub mod search; pub mod search;
use std::net::SocketAddr;
use axum::{http::header, routing::get, Router}; use axum::{http::header, routing::get, Router};
pub const BIND_ADDRESS: &str = "[::]:3000"; pub const BIND_ADDRESS: &str = "[::]:3000";
@ -40,5 +42,10 @@ pub async fn run() {
println!("Listening on {BIND_ADDRESS}"); println!("Listening on {BIND_ADDRESS}");
let listener = tokio::net::TcpListener::bind(BIND_ADDRESS).await.unwrap(); let listener = tokio::net::TcpListener::bind(BIND_ADDRESS).await.unwrap();
axum::serve(listener, app).await.unwrap(); axum::serve(
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.await
.unwrap();
} }

View File

@ -1,16 +1,16 @@
use std::collections::HashMap; use std::{collections::HashMap, net::SocketAddr};
use async_stream::stream; use async_stream::stream;
use axum::{ use axum::{
body::Body, body::Body,
extract::Query, extract::{ConnectInfo, Query},
http::{header, StatusCode}, http::{header, HeaderMap, StatusCode},
response::IntoResponse, response::IntoResponse,
}; };
use bytes::Bytes; use bytes::Bytes;
use html_escape::{encode_text, encode_unquoted_attribute}; use html_escape::{encode_text, encode_unquoted_attribute};
use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response}; use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response, SearchQuery};
fn render_beginning_of_html(query: &str) -> String { fn render_beginning_of_html(query: &str) -> String {
format!( format!(
@ -91,6 +91,13 @@ fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> Strin
fn render_results(response: Response) -> String { fn render_results(response: Response) -> String {
let mut html = String::new(); let mut html = String::new();
if let Some(answer) = response.answer {
html.push_str(&format!(
r#"<div class="answer">{answer_html}{engines_html}</div>"#,
answer_html = &answer.html,
engines_html = render_engine_list(&[answer.engine])
));
}
if let Some(featured_snippet) = response.featured_snippet { if let Some(featured_snippet) = response.featured_snippet {
html.push_str(&render_featured_snippet(&featured_snippet)); html.push_str(&render_featured_snippet(&featured_snippet));
} }
@ -116,7 +123,11 @@ fn render_progress_update(progress_update: &ProgressUpdate) -> String {
) )
} }
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse { pub async fn route(
Query(params): Query<HashMap<String, String>>,
headers: HeaderMap,
ConnectInfo(addr): ConnectInfo<SocketAddr>,
) -> impl IntoResponse {
let query = params let query = params
.get("q") .get("q")
.cloned() .cloned()
@ -135,6 +146,20 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
); );
} }
let query = SearchQuery {
query,
request_headers: headers
.into_iter()
.map(|(k, v)| {
(
k.map(|k| k.to_string()).unwrap_or_default(),
v.to_str().unwrap_or_default().to_string(),
)
})
.collect(),
ip: addr.ip().to_string(),
};
let s = stream! { let s = stream! {
type R = Result<Bytes, eyre::Error>; type R = Result<Bytes, eyre::Error>;
@ -142,7 +167,7 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel(); let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
let search_future = tokio::spawn(async move { engines::search(&query, progress_tx).await }); let search_future = tokio::spawn(async move { engines::search(query, progress_tx).await });
while let Some(progress_update) = progress_rx.recv().await { while let Some(progress_update) = progress_rx.recv().await {
let progress_html = format!( let progress_html = format!(