2023-12-20 10:08:36 +00:00
|
|
|
use std::{collections::HashMap, net::SocketAddr};
|
2023-12-20 06:18:09 +00:00
|
|
|
|
|
|
|
use async_stream::stream;
|
|
|
|
use axum::{
|
|
|
|
body::Body,
|
2023-12-20 10:08:36 +00:00
|
|
|
extract::{ConnectInfo, Query},
|
|
|
|
http::{header, HeaderMap, StatusCode},
|
2023-12-20 06:18:09 +00:00
|
|
|
response::IntoResponse,
|
|
|
|
};
|
|
|
|
use bytes::Bytes;
|
|
|
|
use html_escape::{encode_text, encode_unquoted_attribute};
|
|
|
|
|
2023-12-21 04:18:43 +00:00
|
|
|
use crate::engines::{
|
2023-12-21 05:17:39 +00:00
|
|
|
self, Engine, EngineProgressUpdate, ProgressUpdateData, Response, SearchQuery,
|
2023-12-21 04:18:43 +00:00
|
|
|
};
|
2023-12-20 06:18:09 +00:00
|
|
|
|
|
|
|
fn render_beginning_of_html(query: &str) -> String {
|
|
|
|
format!(
|
|
|
|
r#"<!DOCTYPE html>
|
|
|
|
<html lang="en">
|
|
|
|
<head>
|
|
|
|
<meta charset="UTF-8">
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
|
|
<title>{} - metasearch</title>
|
|
|
|
<link rel="stylesheet" href="/style.css">
|
2023-12-20 09:28:38 +00:00
|
|
|
<script src="/script.js" defer></script>
|
2023-12-20 23:56:03 +00:00
|
|
|
<link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml"/>
|
2023-12-20 06:18:09 +00:00
|
|
|
</head>
|
|
|
|
<body>
|
2023-12-21 04:18:43 +00:00
|
|
|
<div class="results-container">
|
2023-12-20 06:18:09 +00:00
|
|
|
<main>
|
|
|
|
<form action="/search" method="get" class="search-form">
|
2023-12-20 09:28:38 +00:00
|
|
|
<input type="text" name="q" placeholder="Search" value="{}" id="search-input" autofocus onfocus="this.select()" autocomplete="off">
|
2023-12-20 06:18:09 +00:00
|
|
|
<input type="submit" value="Search">
|
|
|
|
</form>
|
|
|
|
<div class="progress-updates">
|
|
|
|
"#,
|
|
|
|
encode_text(query),
|
|
|
|
encode_unquoted_attribute(query)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn render_end_of_html() -> String {
|
2023-12-21 04:18:43 +00:00
|
|
|
r#"</main></div></body></html>"#.to_string()
|
2023-12-20 06:18:09 +00:00
|
|
|
}
|
|
|
|
|
2023-12-20 08:03:29 +00:00
|
|
|
fn render_engine_list(engines: &[engines::Engine]) -> String {
|
|
|
|
let mut html = String::new();
|
|
|
|
for engine in engines {
|
|
|
|
html.push_str(&format!(
|
|
|
|
r#"<span class="engine-list-item">{engine}</span>"#,
|
2023-12-20 09:28:38 +00:00
|
|
|
engine = encode_text(&engine.id())
|
2023-12-20 08:03:29 +00:00
|
|
|
));
|
|
|
|
}
|
|
|
|
format!(r#"<div class="engine-list">{html}</div>"#)
|
|
|
|
}
|
2023-12-20 06:18:09 +00:00
|
|
|
|
2023-12-20 08:03:29 +00:00
|
|
|
fn render_search_result(result: &engines::SearchResult) -> String {
|
2023-12-20 06:18:09 +00:00
|
|
|
format!(
|
|
|
|
r#"<div class="search-result">
|
|
|
|
<a class="search-result-anchor" href="{url_attr}">
|
|
|
|
<span class="search-result-url" href="{url_attr}">{url}</span>
|
|
|
|
<h3 class="search-result-title">{title}</h3>
|
|
|
|
</a>
|
|
|
|
<p class="search-result-description">{desc}</p>
|
2023-12-20 08:03:29 +00:00
|
|
|
{engines_html}
|
2023-12-20 06:18:09 +00:00
|
|
|
</div>
|
|
|
|
"#,
|
|
|
|
url_attr = encode_unquoted_attribute(&result.url),
|
|
|
|
url = encode_text(&result.url),
|
|
|
|
title = encode_text(&result.title),
|
2023-12-20 08:03:29 +00:00
|
|
|
desc = encode_text(&result.description),
|
|
|
|
engines_html = render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>())
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> String {
|
|
|
|
format!(
|
|
|
|
r#"<div class="featured-snippet">
|
|
|
|
<p class="search-result-description">{desc}</p>
|
|
|
|
<a class="search-result-anchor" href="{url_attr}">
|
|
|
|
<span class="search-result-url" href="{url_attr}">{url}</span>
|
|
|
|
<h3 class="search-result-title">{title}</h3>
|
|
|
|
</a>
|
|
|
|
{engines_html}
|
|
|
|
</div>
|
|
|
|
"#,
|
|
|
|
desc = encode_text(&featured_snippet.description),
|
|
|
|
url_attr = encode_unquoted_attribute(&featured_snippet.url),
|
|
|
|
url = encode_text(&featured_snippet.url),
|
|
|
|
title = encode_text(&featured_snippet.title),
|
|
|
|
engines_html = render_engine_list(&[featured_snippet.engine])
|
2023-12-20 06:18:09 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-12-20 08:03:29 +00:00
|
|
|
fn render_results(response: Response) -> String {
|
|
|
|
let mut html = String::new();
|
2023-12-21 04:18:43 +00:00
|
|
|
if let Some(infobox) = response.infobox {
|
|
|
|
html.push_str(&format!(
|
|
|
|
r#"<div class="infobox">{infobox_html}{engines_html}</div>"#,
|
|
|
|
infobox_html = &infobox.html,
|
|
|
|
engines_html = render_engine_list(&[infobox.engine])
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
2023-12-20 10:08:36 +00:00
|
|
|
if let Some(answer) = response.answer {
|
|
|
|
html.push_str(&format!(
|
|
|
|
r#"<div class="answer">{answer_html}{engines_html}</div>"#,
|
|
|
|
answer_html = &answer.html,
|
|
|
|
engines_html = render_engine_list(&[answer.engine])
|
|
|
|
));
|
|
|
|
}
|
2023-12-20 08:03:29 +00:00
|
|
|
if let Some(featured_snippet) = response.featured_snippet {
|
|
|
|
html.push_str(&render_featured_snippet(&featured_snippet));
|
|
|
|
}
|
|
|
|
for result in &response.search_results {
|
|
|
|
html.push_str(&render_search_result(result));
|
|
|
|
}
|
|
|
|
html
|
|
|
|
}
|
|
|
|
|
2023-12-21 04:18:43 +00:00
|
|
|
fn render_engine_progress_update(
|
|
|
|
engine: Engine,
|
|
|
|
progress_update: &EngineProgressUpdate,
|
|
|
|
time_ms: u64,
|
|
|
|
) -> String {
|
|
|
|
let message = match progress_update {
|
|
|
|
EngineProgressUpdate::Requesting => "requesting",
|
|
|
|
EngineProgressUpdate::Downloading => "downloading",
|
|
|
|
EngineProgressUpdate::Parsing => "parsing",
|
|
|
|
EngineProgressUpdate::Done => "<span class=\"progress-update-done\">done</span>",
|
2023-12-20 09:28:38 +00:00
|
|
|
};
|
|
|
|
|
2023-12-21 04:18:43 +00:00
|
|
|
format!(r#"<span class="progress-update-time">{time_ms:>4}ms</span> {engine} {message}"#)
|
2023-12-20 09:28:38 +00:00
|
|
|
}
|
|
|
|
|
2023-12-20 10:08:36 +00:00
|
|
|
pub async fn route(
|
|
|
|
Query(params): Query<HashMap<String, String>>,
|
|
|
|
headers: HeaderMap,
|
|
|
|
ConnectInfo(addr): ConnectInfo<SocketAddr>,
|
|
|
|
) -> impl IntoResponse {
|
2023-12-20 06:18:09 +00:00
|
|
|
let query = params
|
|
|
|
.get("q")
|
|
|
|
.cloned()
|
|
|
|
.unwrap_or_default()
|
|
|
|
.trim()
|
|
|
|
.replace('\n', " ");
|
|
|
|
if query.is_empty() {
|
|
|
|
// redirect to index
|
|
|
|
return (
|
|
|
|
StatusCode::FOUND,
|
|
|
|
[
|
|
|
|
(header::LOCATION, "/"),
|
|
|
|
(header::CONTENT_TYPE, "text/html; charset=utf-8"),
|
|
|
|
],
|
|
|
|
Body::from("<a href=\"/\">No query provided, click here to go back to index</a>"),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2023-12-20 10:08:36 +00:00
|
|
|
let query = SearchQuery {
|
|
|
|
query,
|
|
|
|
request_headers: headers
|
2023-12-20 23:56:03 +00:00
|
|
|
.clone()
|
2023-12-20 10:08:36 +00:00
|
|
|
.into_iter()
|
|
|
|
.map(|(k, v)| {
|
|
|
|
(
|
|
|
|
k.map(|k| k.to_string()).unwrap_or_default(),
|
|
|
|
v.to_str().unwrap_or_default().to_string(),
|
|
|
|
)
|
|
|
|
})
|
|
|
|
.collect(),
|
2023-12-20 23:56:03 +00:00
|
|
|
ip: headers
|
|
|
|
// this could be exploited under some setups, but the ip is only used for the
|
|
|
|
// "what is my ip" answer so it doesn't really matter
|
|
|
|
.get("x-forwarded-for")
|
|
|
|
.map(|ip| ip.to_str().unwrap_or_default().to_string())
|
|
|
|
.unwrap_or_else(|| addr.ip().to_string()),
|
2023-12-20 10:08:36 +00:00
|
|
|
};
|
|
|
|
|
2023-12-20 06:18:09 +00:00
|
|
|
let s = stream! {
|
|
|
|
type R = Result<Bytes, eyre::Error>;
|
|
|
|
|
2023-12-21 04:18:43 +00:00
|
|
|
// the html is sent in three chunks (technically more if you count progress updates):
|
|
|
|
// 1) the beginning of the html, including the search bar
|
|
|
|
// 1.5) the progress updates
|
|
|
|
// 2) the results
|
|
|
|
// 3) the post-search infobox (usually not sent) + the end of the html
|
|
|
|
|
|
|
|
let first_part = render_beginning_of_html(&query);
|
|
|
|
// second part is in the loop
|
|
|
|
let mut third_part = String::new();
|
|
|
|
|
|
|
|
yield R::Ok(Bytes::from(first_part));
|
2023-12-20 06:18:09 +00:00
|
|
|
|
|
|
|
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
|
|
|
|
|
2023-12-20 10:08:36 +00:00
|
|
|
let search_future = tokio::spawn(async move { engines::search(query, progress_tx).await });
|
2023-12-20 06:18:09 +00:00
|
|
|
|
|
|
|
while let Some(progress_update) = progress_rx.recv().await {
|
2023-12-21 04:18:43 +00:00
|
|
|
match progress_update.data {
|
|
|
|
ProgressUpdateData::Engine { engine, update } => {
|
|
|
|
let progress_html = format!(
|
|
|
|
r#"<p class="progress-update">{}</p>"#,
|
|
|
|
render_engine_progress_update(engine, &update, progress_update.time_ms)
|
|
|
|
);
|
|
|
|
yield R::Ok(Bytes::from(progress_html));
|
|
|
|
},
|
|
|
|
ProgressUpdateData::Response(results) => {
|
|
|
|
let mut second_part = String::new();
|
|
|
|
|
|
|
|
second_part.push_str("</div>"); // close progress-updates
|
|
|
|
second_part.push_str("<style>.progress-updates{display:none}</style>");
|
|
|
|
second_part.push_str(&render_results(results));
|
|
|
|
yield Ok(Bytes::from(second_part));
|
|
|
|
},
|
|
|
|
ProgressUpdateData::PostSearchInfobox(infobox) => {
|
|
|
|
third_part.push_str(&format!(
|
|
|
|
r#"<div class="infobox postsearch-infobox">{infobox_html}{engines_html}</div>"#,
|
|
|
|
infobox_html = &infobox.html,
|
|
|
|
engines_html = render_engine_list(&[infobox.engine])
|
|
|
|
));
|
|
|
|
}
|
|
|
|
}
|
2023-12-20 06:18:09 +00:00
|
|
|
}
|
|
|
|
|
2023-12-21 04:18:43 +00:00
|
|
|
if let Err(e) = search_future.await? {
|
|
|
|
let error_html = format!(
|
|
|
|
r#"<h1>Error: {}</p>"#,
|
|
|
|
encode_text(&e.to_string())
|
|
|
|
);
|
|
|
|
yield R::Ok(Bytes::from(error_html));
|
|
|
|
return;
|
2023-12-20 06:18:09 +00:00
|
|
|
};
|
|
|
|
|
2023-12-21 04:18:43 +00:00
|
|
|
third_part.push_str(&render_end_of_html());
|
2023-12-20 06:18:09 +00:00
|
|
|
|
2023-12-21 04:18:43 +00:00
|
|
|
yield Ok(Bytes::from(third_part));
|
2023-12-20 06:18:09 +00:00
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
let stream = Body::from_stream(s);
|
|
|
|
|
|
|
|
(
|
|
|
|
StatusCode::OK,
|
|
|
|
[
|
|
|
|
(header::CONTENT_TYPE, "text/html; charset=utf-8"),
|
|
|
|
(header::TRANSFER_ENCODING, "chunked"),
|
|
|
|
],
|
|
|
|
stream,
|
|
|
|
)
|
|
|
|
}
|