add instant answers

This commit is contained in:
mat 2023-12-20 04:08:36 -06:00
parent 359b8ae2d6
commit bcf42a733a
12 changed files with 211 additions and 37 deletions

1
Cargo.lock generated
View File

@ -794,6 +794,7 @@ dependencies = [
"eyre",
"futures",
"html-escape",
"regex",
"reqwest",
"scraper",
"serde_json",

View File

@ -14,6 +14,7 @@ bytes = "1.5.0"
eyre = "0.6.11"
futures = "0.3.29"
html-escape = "0.2.13"
regex = "1.10.2"
reqwest = { version = "0.11.23", default-features = false, features = [
"rustls-tls",
] }

10
src/engines/answer.rs Normal file
View File

@ -0,0 +1,10 @@
pub mod ip;
pub mod useragent;
macro_rules! regex {
($re:literal $(,)?) => {{
static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
RE.get_or_init(|| regex::Regex::new($re).unwrap())
}};
}
pub(crate) use regex;

13
src/engines/answer/ip.rs Normal file
View File

@ -0,0 +1,13 @@
use crate::engines::{EngineResponse, SearchQuery};
use super::regex;
pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse {
if !regex!("^what('s|s| is) my ip").is_match(&query.query.to_lowercase()) {
return EngineResponse::new();
}
let ip = &query.ip;
EngineResponse::answer_html(format!("Your IP address is <b>{ip}</b>"))
}

View File

@ -0,0 +1,19 @@
use crate::engines::{EngineResponse, SearchQuery};
use super::regex;
pub fn request(_client: &reqwest::Client, query: &SearchQuery) -> EngineResponse {
if !regex!("^what('s|s| is) my (user ?agent|ua)|ua|user ?agent$")
.is_match(&query.query.to_lowercase())
{
return EngineResponse::new();
}
let user_agent = query.request_headers.get("user-agent");
EngineResponse::answer_html(if let Some(user_agent) = user_agent {
format!("Your user agent is <b>{user_agent}</b>")
} else {
format!("You don't have a user agent")
})
}

View File

@ -1,5 +1,6 @@
use std::{
collections::{BTreeSet, HashMap},
ops::Deref,
sync::LazyLock,
time::Instant,
};
@ -9,18 +10,29 @@ use tokio::sync::mpsc;
use self::search::{bing, brave, google};
pub mod answer;
pub mod search;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Engine {
// search
Google,
Bing,
Brave,
// answer
Useragent,
Ip,
}
impl Engine {
pub fn all() -> &'static [Engine] {
&[Engine::Google, Engine::Bing, Engine::Brave]
&[
Engine::Google,
Engine::Bing,
Engine::Brave,
Engine::Useragent,
Engine::Ip,
]
}
pub fn id(&self) -> &'static str {
@ -28,6 +40,8 @@ impl Engine {
Engine::Google => "google",
Engine::Bing => "bing",
Engine::Brave => "brave",
Engine::Useragent => "useragent",
Engine::Ip => "ip",
}
}
@ -36,14 +50,17 @@ impl Engine {
Engine::Google => 1.05,
Engine::Bing => 1.,
Engine::Brave => 1.25,
_ => 1.,
}
}
pub fn request(&self, client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
pub fn request(&self, client: &reqwest::Client, query: &SearchQuery) -> RequestResponse {
match self {
Engine::Google => google::request(client, query),
Engine::Bing => bing::request(client, query),
Engine::Brave => brave::request(client, query),
Engine::Google => google::request(client, query).into(),
Engine::Bing => bing::request(client, query).into(),
Engine::Brave => search::brave::request(client, query).into(),
Engine::Useragent => answer::useragent::request(client, query).into(),
Engine::Ip => answer::ip::request(client, query).into(),
}
}
@ -52,6 +69,7 @@ impl Engine {
Engine::Google => google::parse_response(body),
Engine::Bing => bing::parse_response(body),
Engine::Brave => brave::parse_response(body),
_ => eyre::bail!("engine {self:?} can't parse response"),
}
}
@ -74,6 +92,36 @@ impl Engine {
}
}
pub struct SearchQuery {
pub query: String,
pub request_headers: HashMap<String, String>,
pub ip: String,
}
impl Deref for SearchQuery {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.query
}
}
pub enum RequestResponse {
Http(reqwest::RequestBuilder),
Instant(EngineResponse),
}
impl From<reqwest::RequestBuilder> for RequestResponse {
fn from(req: reqwest::RequestBuilder) -> Self {
Self::Http(req)
}
}
impl From<EngineResponse> for RequestResponse {
fn from(res: EngineResponse) -> Self {
Self::Instant(res)
}
}
#[derive(Debug)]
pub struct EngineSearchResult {
pub url: String,
@ -88,10 +136,24 @@ pub struct EngineFeaturedSnippet {
pub description: String,
}
#[derive(Debug)]
#[derive(Debug, Default)]
pub struct EngineResponse {
pub search_results: Vec<EngineSearchResult>,
pub featured_snippet: Option<EngineFeaturedSnippet>,
pub answer_html: Option<String>,
}
impl EngineResponse {
pub fn new() -> Self {
Self::default()
}
pub fn answer_html(html: String) -> Self {
Self {
answer_html: Some(html),
..Default::default()
}
}
}
#[derive(Debug)]
@ -122,7 +184,7 @@ impl ProgressUpdate {
pub async fn search_with_client_and_engines(
client: &reqwest::Client,
engines: &[Engine],
query: &str,
query: &SearchQuery,
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
) -> eyre::Result<Response> {
let start_time = Instant::now();
@ -137,7 +199,11 @@ pub async fn search_with_client_and_engines(
start_time,
))?;
let res = engine.request(client, query).send().await?;
let request_response = engine.request(client, query).into();
let response = match request_response {
RequestResponse::Http(request) => {
let res = request.send().await?;
progress_tx.send(ProgressUpdate::new(
ProgressUpdateKind::Downloading,
@ -161,6 +227,11 @@ pub async fn search_with_client_and_engines(
start_time,
))?;
response
}
RequestResponse::Instant(response) => response,
};
Ok((engine, response))
});
}
@ -209,11 +280,11 @@ pub async fn autocomplete_with_client_and_engines(
static CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| reqwest::Client::new());
pub async fn search(
query: &str,
query: SearchQuery,
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
) -> eyre::Result<Response> {
let engines = Engine::all();
search_with_client_and_engines(&CLIENT, &engines, query, progress_tx).await
search_with_client_and_engines(&CLIENT, &engines, &query, progress_tx).await
}
pub async fn autocomplete(query: &str) -> eyre::Result<Vec<String>> {
@ -225,6 +296,7 @@ pub async fn autocomplete(query: &str) -> eyre::Result<Vec<String>> {
pub struct Response {
pub search_results: Vec<SearchResult>,
pub featured_snippet: Option<FeaturedSnippet>,
pub answer: Option<Answer>,
}
#[derive(Debug)]
@ -244,9 +316,16 @@ pub struct FeaturedSnippet {
pub engine: Engine,
}
#[derive(Debug)]
pub struct Answer {
pub html: String,
pub engine: Engine,
}
fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Response {
let mut search_results: Vec<SearchResult> = Vec::new();
let mut featured_snippet: Option<FeaturedSnippet> = None;
let mut answer: Option<Answer> = None;
for (engine, response) in responses {
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
@ -299,6 +378,17 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
});
}
}
if let Some(engine_answer_html) = response.answer_html {
// if it has a higher weight than the current answer
let answer_weight = answer.as_ref().map(|s| s.engine.weight()).unwrap_or(0.);
if engine.weight() > answer_weight {
answer = Some(Answer {
html: engine_answer_html,
engine,
});
}
}
}
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
@ -306,6 +396,7 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
Response {
search_results,
featured_snippet,
answer,
}
}

View File

@ -176,5 +176,7 @@ pub(super) fn parse_html_response_with_opts(
Ok(EngineResponse {
search_results,
featured_snippet,
// this field is used by instant answers, not normal search engines
answer_html: None,
})
}

View File

@ -12,8 +12,6 @@ searchInputEl.addEventListener("input", async (e) => {
const res = await fetch(`/autocomplete?q=${value}`).then((res) => res.json());
const options = res[1];
console.log(options);
datalistEl.innerHTML = "";
options.forEach((option) => {
const optionEl = document.createElement("option");

View File

@ -97,6 +97,13 @@ h1 {
margin-bottom: 1rem;
}
/* answer */
.answer {
margin-bottom: 1rem;
border: 1px solid #234;
padding: 0.5rem;
}
/* progress update */
.progress-updates {
margin-bottom: 1rem;

View File

@ -2,7 +2,7 @@ use std::collections::HashMap;
use axum::{extract::Query, http::StatusCode, response::IntoResponse, Json};
use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response};
use crate::engines;
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
let query = params

View File

@ -1,6 +1,8 @@
pub mod autocomplete;
pub mod search;
use std::net::SocketAddr;
use axum::{http::header, routing::get, Router};
pub const BIND_ADDRESS: &str = "[::]:3000";
@ -40,5 +42,10 @@ pub async fn run() {
println!("Listening on {BIND_ADDRESS}");
let listener = tokio::net::TcpListener::bind(BIND_ADDRESS).await.unwrap();
axum::serve(listener, app).await.unwrap();
axum::serve(
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.await
.unwrap();
}

View File

@ -1,16 +1,16 @@
use std::collections::HashMap;
use std::{collections::HashMap, net::SocketAddr};
use async_stream::stream;
use axum::{
body::Body,
extract::Query,
http::{header, StatusCode},
extract::{ConnectInfo, Query},
http::{header, HeaderMap, StatusCode},
response::IntoResponse,
};
use bytes::Bytes;
use html_escape::{encode_text, encode_unquoted_attribute};
use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response};
use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response, SearchQuery};
fn render_beginning_of_html(query: &str) -> String {
format!(
@ -91,6 +91,13 @@ fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> Strin
fn render_results(response: Response) -> String {
let mut html = String::new();
if let Some(answer) = response.answer {
html.push_str(&format!(
r#"<div class="answer">{answer_html}{engines_html}</div>"#,
answer_html = &answer.html,
engines_html = render_engine_list(&[answer.engine])
));
}
if let Some(featured_snippet) = response.featured_snippet {
html.push_str(&render_featured_snippet(&featured_snippet));
}
@ -116,7 +123,11 @@ fn render_progress_update(progress_update: &ProgressUpdate) -> String {
)
}
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
pub async fn route(
Query(params): Query<HashMap<String, String>>,
headers: HeaderMap,
ConnectInfo(addr): ConnectInfo<SocketAddr>,
) -> impl IntoResponse {
let query = params
.get("q")
.cloned()
@ -135,6 +146,20 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
);
}
let query = SearchQuery {
query,
request_headers: headers
.into_iter()
.map(|(k, v)| {
(
k.map(|k| k.to_string()).unwrap_or_default(),
v.to_str().unwrap_or_default().to_string(),
)
})
.collect(),
ip: addr.ip().to_string(),
};
let s = stream! {
type R = Result<Bytes, eyre::Error>;
@ -142,7 +167,7 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
let search_future = tokio::spawn(async move { engines::search(&query, progress_tx).await });
let search_future = tokio::spawn(async move { engines::search(query, progress_tx).await });
while let Some(progress_update) = progress_rx.recv().await {
let progress_html = format!(