add wikipedia and stackoverflow engines
This commit is contained in:
parent
d496f3768d
commit
f95c5fe273
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -805,6 +805,7 @@ dependencies = [
|
||||
"regex",
|
||||
"reqwest",
|
||||
"scraper",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
|
@ -21,6 +21,7 @@ reqwest = { version = "0.11.23", default-features = false, features = [
|
||||
"rustls-tls",
|
||||
] }
|
||||
scraper = "0.18.1"
|
||||
serde = { version = "1.0.193", features = ["derive"] }
|
||||
serde_json = "1.0.108"
|
||||
tokio = { version = "1.35.0", features = ["full"] }
|
||||
tokio-stream = "0.1.14"
|
||||
|
4
README
4
README
@ -9,5 +9,5 @@ metasearch is a single binary with no cli or configuration file. if you want to
|
||||
configure it (like to change the default port or weights of engines) then you
|
||||
have to modify the source.
|
||||
|
||||
build it with `cargo b -r`, the resulting binary will be in `target/release/metasearch2`.
|
||||
it runs on port 28019.
|
||||
build it with `cargo b -r`, the resulting binary will be in
|
||||
`target/release/metasearch2`. it runs on port 28019.
|
||||
|
@ -1,6 +1,7 @@
|
||||
pub mod calc;
|
||||
pub mod ip;
|
||||
pub mod useragent;
|
||||
pub mod wikipedia;
|
||||
|
||||
macro_rules! regex {
|
||||
($re:literal $(,)?) => {{
|
||||
|
@ -1,8 +1,6 @@
|
||||
use crate::engines::{EngineResponse, SearchQuery};
|
||||
|
||||
pub fn request(query: &SearchQuery) -> EngineResponse {
|
||||
let query = query.query.as_str();
|
||||
use crate::engines::EngineResponse;
|
||||
|
||||
pub fn request(query: &str) -> EngineResponse {
|
||||
let Some(result_html) = evaluate(query, true) else {
|
||||
return EngineResponse::new();
|
||||
};
|
||||
|
96
src/engines/answer/wikipedia.rs
Normal file
96
src/engines/answer/wikipedia.rs
Normal file
@ -0,0 +1,96 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use reqwest::Url;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::engines::{EngineResponse, CLIENT};
|
||||
|
||||
pub fn request(query: &str) -> reqwest::RequestBuilder {
|
||||
println!("request wikipedia");
|
||||
CLIENT
|
||||
.get(
|
||||
Url::parse_with_params(
|
||||
"https://en.wikipedia.org/w/api.php",
|
||||
&[
|
||||
("format", "json"),
|
||||
("action", "query"),
|
||||
("prop", "extracts|pageimages"),
|
||||
("exintro", ""),
|
||||
("explaintext", ""),
|
||||
("redirects", "1"),
|
||||
("exsentences", "2"),
|
||||
("titles", query),
|
||||
],
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.header(
|
||||
"User-Agent",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||
)
|
||||
.header("Accept-Language", "en-US,en;q=0.5")
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct WikipediaResponse {
|
||||
pub batchcomplete: String,
|
||||
pub query: WikipediaQuery,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct WikipediaQuery {
|
||||
pub pages: HashMap<String, WikipediaPage>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct WikipediaPage {
|
||||
pub pageid: u64,
|
||||
pub ns: u64,
|
||||
pub title: String,
|
||||
pub extract: String,
|
||||
pub thumbnail: Option<WikipediaThumbnail>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct WikipediaThumbnail {
|
||||
pub source: String,
|
||||
pub width: u64,
|
||||
pub height: u64,
|
||||
}
|
||||
|
||||
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||
let Ok(res) = serde_json::from_str::<WikipediaResponse>(body) else {
|
||||
return Ok(EngineResponse::new());
|
||||
};
|
||||
|
||||
let pages: Vec<(String, WikipediaPage)> = res.query.pages.into_iter().collect();
|
||||
|
||||
if pages.is_empty() || pages[0].0 == "-1" {
|
||||
return Ok(EngineResponse::new());
|
||||
}
|
||||
|
||||
let page = &pages[0].1;
|
||||
let WikipediaPage {
|
||||
pageid: _,
|
||||
ns: _,
|
||||
title,
|
||||
extract,
|
||||
thumbnail: _,
|
||||
} = page;
|
||||
if extract.ends_with(":") {
|
||||
return Ok(EngineResponse::new());
|
||||
}
|
||||
|
||||
// this is present on the wikipedia article for google
|
||||
let extract = extract.replace("( )", "");
|
||||
|
||||
let page_title = title.replace(" ", "_");
|
||||
let page_url = format!("https://en.wikipedia.org/wiki/{page_title}");
|
||||
|
||||
Ok(EngineResponse::infobox_html(format!(
|
||||
r#"<a href="{page_url}"><h2>{title}</h2></a><p>{extract}</p>"#,
|
||||
page_url = html_escape::encode_quoted_attribute(&page_url),
|
||||
title = html_escape::encode_text(title),
|
||||
extract = html_escape::encode_text(&extract),
|
||||
)))
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
use std::{
|
||||
collections::{BTreeSet, HashMap},
|
||||
fmt,
|
||||
net::IpAddr,
|
||||
ops::Deref,
|
||||
str::FromStr,
|
||||
@ -11,6 +12,7 @@ use futures::future::join_all;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
pub mod answer;
|
||||
pub mod postsearch;
|
||||
pub mod search;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
@ -23,6 +25,9 @@ pub enum Engine {
|
||||
Useragent,
|
||||
Ip,
|
||||
Calc,
|
||||
Wikipedia,
|
||||
// post-search
|
||||
StackOverflow,
|
||||
}
|
||||
|
||||
impl Engine {
|
||||
@ -34,6 +39,8 @@ impl Engine {
|
||||
Engine::Useragent,
|
||||
Engine::Ip,
|
||||
Engine::Calc,
|
||||
Engine::Wikipedia,
|
||||
Engine::StackOverflow,
|
||||
]
|
||||
}
|
||||
|
||||
@ -45,6 +52,8 @@ impl Engine {
|
||||
Engine::Useragent => "useragent",
|
||||
Engine::Ip => "ip",
|
||||
Engine::Calc => "calc",
|
||||
Engine::Wikipedia => "wikipedia",
|
||||
Engine::StackOverflow => "stackoverflow",
|
||||
}
|
||||
}
|
||||
|
||||
@ -65,6 +74,8 @@ impl Engine {
|
||||
Engine::Useragent => answer::useragent::request(query).into(),
|
||||
Engine::Ip => answer::ip::request(query).into(),
|
||||
Engine::Calc => answer::calc::request(query).into(),
|
||||
Engine::Wikipedia => answer::wikipedia::request(query).into(),
|
||||
_ => RequestResponse::None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,6 +84,7 @@ impl Engine {
|
||||
Engine::Google => search::google::parse_response(body),
|
||||
Engine::Bing => search::bing::parse_response(body),
|
||||
Engine::Brave => search::brave::parse_response(body),
|
||||
Engine::Wikipedia => answer::wikipedia::parse_response(body),
|
||||
_ => eyre::bail!("engine {self:?} can't parse response"),
|
||||
}
|
||||
}
|
||||
@ -91,6 +103,26 @@ impl Engine {
|
||||
_ => eyre::bail!("engine {self:?} can't parse autocomplete response"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn postsearch_request(&self, response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||
match self {
|
||||
Engine::StackOverflow => postsearch::stackoverflow::request(response),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn postsearch_parse_response(&self, body: &str) -> Option<String> {
|
||||
match self {
|
||||
Engine::StackOverflow => postsearch::stackoverflow::parse_response(body),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Engine {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.id())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SearchQuery {
|
||||
@ -108,6 +140,7 @@ impl Deref for SearchQuery {
|
||||
}
|
||||
|
||||
pub enum RequestResponse {
|
||||
None,
|
||||
Http(reqwest::RequestBuilder),
|
||||
Instant(EngineResponse),
|
||||
}
|
||||
@ -156,6 +189,7 @@ pub struct EngineResponse {
|
||||
pub search_results: Vec<EngineSearchResult>,
|
||||
pub featured_snippet: Option<EngineFeaturedSnippet>,
|
||||
pub answer_html: Option<String>,
|
||||
pub infobox_html: Option<String>,
|
||||
}
|
||||
|
||||
impl EngineResponse {
|
||||
@ -169,29 +203,44 @@ impl EngineResponse {
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infobox_html(html: String) -> Self {
|
||||
Self {
|
||||
infobox_html: Some(html),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ProgressUpdateKind {
|
||||
pub enum EngineProgressUpdate {
|
||||
Requesting,
|
||||
Downloading,
|
||||
Parsing,
|
||||
Done,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ProgressUpdateData {
|
||||
Engine {
|
||||
engine: Engine,
|
||||
update: EngineProgressUpdate,
|
||||
},
|
||||
Response(Response),
|
||||
PostSearchInfobox(Infobox),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ProgressUpdate {
|
||||
pub kind: ProgressUpdateKind,
|
||||
pub engine: Engine,
|
||||
pub time: u64,
|
||||
pub data: ProgressUpdateData,
|
||||
pub time_ms: u64,
|
||||
}
|
||||
|
||||
impl ProgressUpdate {
|
||||
pub fn new(kind: ProgressUpdateKind, engine: Engine, start_time: Instant) -> Self {
|
||||
pub fn new(data: ProgressUpdateData, start_time: Instant) -> Self {
|
||||
Self {
|
||||
kind,
|
||||
engine,
|
||||
time: start_time.elapsed().as_millis() as u64,
|
||||
data,
|
||||
time_ms: start_time.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -200,7 +249,7 @@ pub async fn search_with_engines(
|
||||
engines: &[Engine],
|
||||
query: &SearchQuery,
|
||||
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
||||
) -> eyre::Result<Response> {
|
||||
) -> eyre::Result<()> {
|
||||
let start_time = Instant::now();
|
||||
|
||||
let mut requests = Vec::new();
|
||||
@ -213,38 +262,47 @@ pub async fn search_with_engines(
|
||||
let response = match request_response {
|
||||
RequestResponse::Http(request) => {
|
||||
progress_tx.send(ProgressUpdate::new(
|
||||
ProgressUpdateKind::Requesting,
|
||||
ProgressUpdateData::Engine {
|
||||
engine,
|
||||
update: EngineProgressUpdate::Requesting,
|
||||
},
|
||||
start_time,
|
||||
))?;
|
||||
|
||||
let res = request.send().await?;
|
||||
|
||||
progress_tx.send(ProgressUpdate::new(
|
||||
ProgressUpdateKind::Downloading,
|
||||
ProgressUpdateData::Engine {
|
||||
engine,
|
||||
update: EngineProgressUpdate::Downloading,
|
||||
},
|
||||
start_time,
|
||||
))?;
|
||||
|
||||
let body = res.text().await?;
|
||||
|
||||
progress_tx.send(ProgressUpdate::new(
|
||||
ProgressUpdateKind::Parsing,
|
||||
ProgressUpdateData::Engine {
|
||||
engine,
|
||||
update: EngineProgressUpdate::Parsing,
|
||||
},
|
||||
start_time,
|
||||
))?;
|
||||
|
||||
let response = engine.parse_response(&body)?;
|
||||
|
||||
progress_tx.send(ProgressUpdate::new(
|
||||
ProgressUpdateKind::Done,
|
||||
ProgressUpdateData::Engine {
|
||||
engine,
|
||||
update: EngineProgressUpdate::Done,
|
||||
},
|
||||
start_time,
|
||||
))?;
|
||||
|
||||
response
|
||||
}
|
||||
RequestResponse::Instant(response) => response,
|
||||
RequestResponse::None => EngineResponse::new(),
|
||||
};
|
||||
|
||||
Ok((engine, response))
|
||||
@ -260,7 +318,60 @@ pub async fn search_with_engines(
|
||||
join_all(response_futures).await.into_iter().collect();
|
||||
let responses = responses_result?;
|
||||
|
||||
Ok(merge_engine_responses(responses))
|
||||
let response = merge_engine_responses(responses);
|
||||
|
||||
let has_infobox = response.infobox.is_some();
|
||||
|
||||
progress_tx.send(ProgressUpdate::new(
|
||||
ProgressUpdateData::Response(response.clone()),
|
||||
start_time,
|
||||
))?;
|
||||
|
||||
if !has_infobox {
|
||||
// post-search
|
||||
|
||||
let mut postsearch_requests = Vec::new();
|
||||
for engine in engines {
|
||||
if let Some(request) = engine.postsearch_request(&response) {
|
||||
postsearch_requests.push(async {
|
||||
let response = match request.send().await {
|
||||
Ok(res) => {
|
||||
let body = res.text().await?;
|
||||
engine.postsearch_parse_response(&body)
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("postsearch request error: {}", e);
|
||||
None
|
||||
}
|
||||
};
|
||||
Ok((*engine, response))
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut postsearch_response_futures = Vec::new();
|
||||
for request in postsearch_requests {
|
||||
postsearch_response_futures.push(request);
|
||||
}
|
||||
|
||||
let postsearch_responses_result: eyre::Result<HashMap<_, _>> =
|
||||
join_all(postsearch_response_futures)
|
||||
.await
|
||||
.into_iter()
|
||||
.collect();
|
||||
let postsearch_responses = postsearch_responses_result?;
|
||||
|
||||
for (engine, response) in postsearch_responses {
|
||||
if let Some(html) = response {
|
||||
progress_tx.send(ProgressUpdate::new(
|
||||
ProgressUpdateData::PostSearchInfobox(Infobox { html, engine }),
|
||||
start_time,
|
||||
))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn autocomplete_with_engines(
|
||||
@ -306,7 +417,7 @@ pub static CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| {
|
||||
pub async fn search(
|
||||
query: SearchQuery,
|
||||
progress_tx: mpsc::UnboundedSender<ProgressUpdate>,
|
||||
) -> eyre::Result<Response> {
|
||||
) -> eyre::Result<()> {
|
||||
let engines = Engine::all();
|
||||
search_with_engines(&engines, &query, progress_tx).await
|
||||
}
|
||||
@ -316,14 +427,15 @@ pub async fn autocomplete(query: &str) -> eyre::Result<Vec<String>> {
|
||||
autocomplete_with_engines(&engines, query).await
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Response {
|
||||
pub search_results: Vec<SearchResult>,
|
||||
pub featured_snippet: Option<FeaturedSnippet>,
|
||||
pub answer: Option<Answer>,
|
||||
pub infobox: Option<Infobox>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SearchResult {
|
||||
pub url: String,
|
||||
pub title: String,
|
||||
@ -332,7 +444,7 @@ pub struct SearchResult {
|
||||
pub score: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FeaturedSnippet {
|
||||
pub url: String,
|
||||
pub title: String,
|
||||
@ -340,16 +452,23 @@ pub struct FeaturedSnippet {
|
||||
pub engine: Engine,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Answer {
|
||||
pub html: String,
|
||||
pub engine: Engine,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Infobox {
|
||||
pub html: String,
|
||||
pub engine: Engine,
|
||||
}
|
||||
|
||||
fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Response {
|
||||
let mut search_results: Vec<SearchResult> = Vec::new();
|
||||
let mut featured_snippet: Option<FeaturedSnippet> = None;
|
||||
let mut answer: Option<Answer> = None;
|
||||
let mut infobox: Option<Infobox> = None;
|
||||
|
||||
for (engine, response) in responses {
|
||||
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
|
||||
@ -413,6 +532,17 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(engine_infobox_html) = response.infobox_html {
|
||||
// if it has a higher weight than the current infobox
|
||||
let infobox_weight = infobox.as_ref().map(|s| s.engine.weight()).unwrap_or(0.);
|
||||
if engine.weight() > infobox_weight {
|
||||
infobox = Some(Infobox {
|
||||
html: engine_infobox_html,
|
||||
engine,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||
@ -421,6 +551,7 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
|
||||
search_results,
|
||||
featured_snippet,
|
||||
answer,
|
||||
infobox,
|
||||
}
|
||||
}
|
||||
|
||||
|
5
src/engines/postsearch.rs
Normal file
5
src/engines/postsearch.rs
Normal file
@ -0,0 +1,5 @@
|
||||
//! These search engines are requested after we've built the main search
|
||||
//! results. They can only show stuff in infoboxes and don't get requested if
|
||||
//! an infobox was added by another earlier engine.
|
||||
|
||||
pub mod stackoverflow;
|
57
src/engines/postsearch/stackoverflow.rs
Normal file
57
src/engines/postsearch/stackoverflow.rs
Normal file
@ -0,0 +1,57 @@
|
||||
use reqwest::Url;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::engines::{Response, CLIENT};
|
||||
|
||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||
for search_result in response.search_results.iter().take(8) {
|
||||
if search_result
|
||||
.url
|
||||
.starts_with("https://stackoverflow.com/questions/")
|
||||
{
|
||||
return Some(CLIENT.get(search_result.url.as_str()).header(
|
||||
"User-Agent",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_response(body: &str) -> Option<String> {
|
||||
let dom = Html::parse_document(body);
|
||||
|
||||
let title = dom
|
||||
.select(&Selector::parse("h1").unwrap())
|
||||
.next()?
|
||||
.text()
|
||||
.collect::<String>();
|
||||
let url = Url::join(
|
||||
&Url::parse("https://stackoverflow.com").unwrap(),
|
||||
dom.select(&Selector::parse(".question-hyperlink").unwrap())
|
||||
.next()?
|
||||
.value()
|
||||
.attr("href")?,
|
||||
)
|
||||
.ok()?;
|
||||
|
||||
let answer_query = Selector::parse("div.answer.accepted-answer").unwrap();
|
||||
|
||||
let answer = dom.select(&answer_query).next()?;
|
||||
let answer_id = answer.value().attr("data-answerid")?;
|
||||
let answer_html = answer
|
||||
.select(&Selector::parse("div.answercell > div.js-post-body").unwrap())
|
||||
.next()?
|
||||
.html()
|
||||
.to_string();
|
||||
|
||||
let url = format!("{url}#{answer_id}");
|
||||
|
||||
Some(format!(
|
||||
r#"<a href="{url}" class="title"><h2>{title}</h2></a>
|
||||
<div class="infobox-stackoverflow-answer">{answer_html}</div>"#,
|
||||
url = html_escape::encode_quoted_attribute(&url.to_string()),
|
||||
title = html_escape::encode_text(&title),
|
||||
))
|
||||
}
|
@ -188,7 +188,8 @@ pub(super) fn parse_html_response_with_opts(
|
||||
Ok(EngineResponse {
|
||||
search_results,
|
||||
featured_snippet,
|
||||
// this field is used by instant answers, not normal search engines
|
||||
// these fields are used by instant answers, not normal search engines
|
||||
answer_html: None,
|
||||
infobox_html: None,
|
||||
})
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
<!-- source code: https://https://git.matdoes.dev/mat/metasearch2 -->
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
|
2
src/web/assets/robots.txt
Normal file
2
src/web/assets/robots.txt
Normal file
@ -0,0 +1,2 @@
|
||||
User-agent: *
|
||||
Disallow: /
|
@ -9,12 +9,24 @@ body {
|
||||
line-height: 1.2;
|
||||
height: 100%;
|
||||
}
|
||||
.results-container {
|
||||
/* enough space for the infobox */
|
||||
max-width: 73.5rem;
|
||||
margin: 0 auto;
|
||||
}
|
||||
main {
|
||||
max-width: 40rem;
|
||||
/* margin: 0 0 0 10rem; */
|
||||
padding: 1rem 0.5rem;
|
||||
margin: 0 auto;
|
||||
background-color: #0d1017;
|
||||
height: 100%;
|
||||
min-height: 100%;
|
||||
}
|
||||
@media screen and (max-width: 80rem) {
|
||||
/* small screens */
|
||||
.results-container {
|
||||
margin: 0 auto;
|
||||
max-width: 40rem;
|
||||
}
|
||||
}
|
||||
input {
|
||||
font-family: monospace;
|
||||
@ -27,11 +39,19 @@ input {
|
||||
input[type="submit"] {
|
||||
cursor: pointer;
|
||||
}
|
||||
a {
|
||||
color: #29e;
|
||||
text-decoration: none;
|
||||
}
|
||||
a:visited {
|
||||
color: #92e;
|
||||
}
|
||||
|
||||
/* index page */
|
||||
.main-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-height: 100%;
|
||||
height: 100%;
|
||||
justify-content: center;
|
||||
margin: 0 auto;
|
||||
@ -57,8 +77,6 @@ h1 {
|
||||
font-size: 1rem;
|
||||
}
|
||||
.search-result-anchor {
|
||||
color: inherit;
|
||||
text-decoration: none;
|
||||
display: block;
|
||||
}
|
||||
.search-result-url {
|
||||
@ -69,7 +87,6 @@ h1 {
|
||||
.search-result-title {
|
||||
margin: 0;
|
||||
font-size: 1rem;
|
||||
color: #29e;
|
||||
}
|
||||
.search-result-description {
|
||||
margin: 0;
|
||||
@ -106,7 +123,7 @@ h1 {
|
||||
}
|
||||
.progress-update {
|
||||
margin: 0;
|
||||
white-space: pre;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
.progress-update-time {
|
||||
opacity: 0.5;
|
||||
@ -135,7 +152,7 @@ h1 {
|
||||
}
|
||||
.answer-calc-constant {
|
||||
color: #d2a6ff;
|
||||
white-space: pre;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
.answer-calc-string {
|
||||
color: #aad94c;
|
||||
@ -143,3 +160,39 @@ h1 {
|
||||
.answer-calc-special {
|
||||
color: #e6b673;
|
||||
}
|
||||
|
||||
/* infobox */
|
||||
.infobox {
|
||||
margin-bottom: 1rem;
|
||||
border: 1px solid #234;
|
||||
padding: 0.5rem;
|
||||
position: absolute;
|
||||
top: 3.5rem;
|
||||
max-width: 30rem;
|
||||
margin-left: 42rem;
|
||||
}
|
||||
@media screen and (max-width: 80rem) {
|
||||
/* small screens */
|
||||
.infobox {
|
||||
position: static;
|
||||
margin: 0;
|
||||
max-width: unset;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.postsearch-infobox {
|
||||
/* displaying these properly is too hard so don't */
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
.infobox h2 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
.infobox p {
|
||||
margin: 0;
|
||||
}
|
||||
.postsearch-infobox p {
|
||||
margin-bottom: 1em;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
@ -37,6 +37,15 @@ pub async fn run() {
|
||||
)
|
||||
}),
|
||||
)
|
||||
.route(
|
||||
"/robots.txt",
|
||||
get(|| async {
|
||||
(
|
||||
[(header::CONTENT_TYPE, "text/plain; charset=utf-8")],
|
||||
include_str!("assets/robots.txt"),
|
||||
)
|
||||
}),
|
||||
)
|
||||
.route("/opensearch.xml", get(opensearch::route))
|
||||
.route("/search", get(search::route))
|
||||
.route("/autocomplete", get(autocomplete::route));
|
||||
|
@ -10,7 +10,9 @@ use axum::{
|
||||
use bytes::Bytes;
|
||||
use html_escape::{encode_text, encode_unquoted_attribute};
|
||||
|
||||
use crate::engines::{self, ProgressUpdate, ProgressUpdateKind, Response, SearchQuery};
|
||||
use crate::engines::{
|
||||
self, Engine, EngineProgressUpdate, ProgressUpdate, ProgressUpdateData, Response, SearchQuery,
|
||||
};
|
||||
|
||||
fn render_beginning_of_html(query: &str) -> String {
|
||||
format!(
|
||||
@ -25,6 +27,7 @@ fn render_beginning_of_html(query: &str) -> String {
|
||||
<link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml"/>
|
||||
</head>
|
||||
<body>
|
||||
<div class="results-container">
|
||||
<main>
|
||||
<form action="/search" method="get" class="search-form">
|
||||
<input type="text" name="q" placeholder="Search" value="{}" id="search-input" autofocus onfocus="this.select()" autocomplete="off">
|
||||
@ -38,7 +41,7 @@ fn render_beginning_of_html(query: &str) -> String {
|
||||
}
|
||||
|
||||
fn render_end_of_html() -> String {
|
||||
r#"</main></body></html>"#.to_string()
|
||||
r#"</main></div></body></html>"#.to_string()
|
||||
}
|
||||
|
||||
fn render_engine_list(engines: &[engines::Engine]) -> String {
|
||||
@ -92,6 +95,14 @@ fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> Strin
|
||||
|
||||
fn render_results(response: Response) -> String {
|
||||
let mut html = String::new();
|
||||
if let Some(infobox) = response.infobox {
|
||||
html.push_str(&format!(
|
||||
r#"<div class="infobox">{infobox_html}{engines_html}</div>"#,
|
||||
infobox_html = &infobox.html,
|
||||
engines_html = render_engine_list(&[infobox.engine])
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(answer) = response.answer {
|
||||
html.push_str(&format!(
|
||||
r#"<div class="answer">{answer_html}{engines_html}</div>"#,
|
||||
@ -108,20 +119,19 @@ fn render_results(response: Response) -> String {
|
||||
html
|
||||
}
|
||||
|
||||
fn render_progress_update(progress_update: &ProgressUpdate) -> String {
|
||||
let message: &str = match progress_update.kind {
|
||||
ProgressUpdateKind::Requesting => "requesting",
|
||||
ProgressUpdateKind::Downloading => "downloading",
|
||||
ProgressUpdateKind::Parsing => "parsing",
|
||||
ProgressUpdateKind::Done => "<span class=\"progress-update-done\">done</span>",
|
||||
fn render_engine_progress_update(
|
||||
engine: Engine,
|
||||
progress_update: &EngineProgressUpdate,
|
||||
time_ms: u64,
|
||||
) -> String {
|
||||
let message = match progress_update {
|
||||
EngineProgressUpdate::Requesting => "requesting",
|
||||
EngineProgressUpdate::Downloading => "downloading",
|
||||
EngineProgressUpdate::Parsing => "parsing",
|
||||
EngineProgressUpdate::Done => "<span class=\"progress-update-done\">done</span>",
|
||||
};
|
||||
|
||||
format!(
|
||||
r#"<span class="progress-update-time">{time:>4}ms</span> {engine} {message}"#,
|
||||
time = progress_update.time,
|
||||
message = message,
|
||||
engine = progress_update.engine.id()
|
||||
)
|
||||
format!(r#"<span class="progress-update-time">{time_ms:>4}ms</span> {engine} {message}"#)
|
||||
}
|
||||
|
||||
pub async fn route(
|
||||
@ -170,40 +180,61 @@ pub async fn route(
|
||||
let s = stream! {
|
||||
type R = Result<Bytes, eyre::Error>;
|
||||
|
||||
yield R::Ok(Bytes::from(render_beginning_of_html(&query)));
|
||||
// the html is sent in three chunks (technically more if you count progress updates):
|
||||
// 1) the beginning of the html, including the search bar
|
||||
// 1.5) the progress updates
|
||||
// 2) the results
|
||||
// 3) the post-search infobox (usually not sent) + the end of the html
|
||||
|
||||
let first_part = render_beginning_of_html(&query);
|
||||
// second part is in the loop
|
||||
let mut third_part = String::new();
|
||||
|
||||
yield R::Ok(Bytes::from(first_part));
|
||||
|
||||
let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
|
||||
|
||||
let search_future = tokio::spawn(async move { engines::search(query, progress_tx).await });
|
||||
|
||||
while let Some(progress_update) = progress_rx.recv().await {
|
||||
match progress_update.data {
|
||||
ProgressUpdateData::Engine { engine, update } => {
|
||||
let progress_html = format!(
|
||||
r#"<p class="progress-update">{}</p>"#,
|
||||
render_progress_update(&progress_update)
|
||||
render_engine_progress_update(engine, &update, progress_update.time_ms)
|
||||
);
|
||||
yield R::Ok(Bytes::from(progress_html));
|
||||
},
|
||||
ProgressUpdateData::Response(results) => {
|
||||
let mut second_part = String::new();
|
||||
|
||||
second_part.push_str("</div>"); // close progress-updates
|
||||
second_part.push_str("<style>.progress-updates{display:none}</style>");
|
||||
second_part.push_str(&render_results(results));
|
||||
yield Ok(Bytes::from(second_part));
|
||||
},
|
||||
ProgressUpdateData::PostSearchInfobox(infobox) => {
|
||||
third_part.push_str(&format!(
|
||||
r#"<div class="infobox postsearch-infobox">{infobox_html}{engines_html}</div>"#,
|
||||
infobox_html = &infobox.html,
|
||||
engines_html = render_engine_list(&[infobox.engine])
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let results = match search_future.await? {
|
||||
Ok(results) => results,
|
||||
Err(e) => {
|
||||
if let Err(e) = search_future.await? {
|
||||
let error_html = format!(
|
||||
r#"<h1>Error: {}</p>"#,
|
||||
encode_text(&e.to_string())
|
||||
);
|
||||
yield R::Ok(Bytes::from(error_html));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mut second_half = String::new();
|
||||
third_part.push_str(&render_end_of_html());
|
||||
|
||||
second_half.push_str("</div>"); // close progress-updates
|
||||
second_half.push_str("<style>.progress-updates{display:none}</style>");
|
||||
second_half.push_str(&render_results(results));
|
||||
second_half.push_str(&render_end_of_html());
|
||||
|
||||
yield Ok(Bytes::from(second_half));
|
||||
yield Ok(Bytes::from(third_part));
|
||||
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user