use featured snippets from google
This commit is contained in:
parent
da972bd45e
commit
aeac6f7c5d
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -800,6 +800,7 @@ dependencies = [
|
|||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"url",
|
"url",
|
||||||
|
"urlencoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1710,6 +1711,12 @@ dependencies = [
|
|||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "urlencoding"
|
||||||
|
version = "2.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf-8"
|
name = "utf-8"
|
||||||
version = "0.7.6"
|
version = "0.7.6"
|
||||||
|
@ -22,3 +22,4 @@ tokio = { version = "1.35.0", features = ["full"] }
|
|||||||
tokio-stream = "0.1.14"
|
tokio-stream = "0.1.14"
|
||||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||||
url = "2.5.0"
|
url = "2.5.0"
|
||||||
|
urlencoding = "2.1.3"
|
||||||
|
@ -64,8 +64,17 @@ pub struct EngineSearchResult {
|
|||||||
pub description: String,
|
pub description: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct EngineFeaturedSnippet {
|
||||||
|
pub url: String,
|
||||||
|
pub title: String,
|
||||||
|
pub description: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct EngineResponse {
|
pub struct EngineResponse {
|
||||||
pub search_results: Vec<EngineSearchResult>,
|
pub search_results: Vec<EngineSearchResult>,
|
||||||
|
pub featured_snippet: Option<EngineFeaturedSnippet>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -80,7 +89,7 @@ pub enum ProgressUpdateKind {
|
|||||||
pub struct ProgressUpdate {
|
pub struct ProgressUpdate {
|
||||||
pub kind: ProgressUpdateKind,
|
pub kind: ProgressUpdateKind,
|
||||||
pub engine: Engine,
|
pub engine: Engine,
|
||||||
pub time: f64,
|
pub time: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ProgressUpdate {
|
impl ProgressUpdate {
|
||||||
@ -88,7 +97,7 @@ impl ProgressUpdate {
|
|||||||
Self {
|
Self {
|
||||||
kind,
|
kind,
|
||||||
engine,
|
engine,
|
||||||
time: start_time.elapsed().as_secs_f64(),
|
time: start_time.elapsed().as_millis() as u64,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -96,15 +105,15 @@ impl ProgressUpdate {
|
|||||||
impl fmt::Display for ProgressUpdate {
|
impl fmt::Display for ProgressUpdate {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
let message = match self.kind {
|
let message = match self.kind {
|
||||||
ProgressUpdateKind::Requesting => "Requesting",
|
ProgressUpdateKind::Requesting => "requesting",
|
||||||
ProgressUpdateKind::Downloading => "Downloading",
|
ProgressUpdateKind::Downloading => "downloading",
|
||||||
ProgressUpdateKind::Parsing => "Parsing",
|
ProgressUpdateKind::Parsing => "parsing",
|
||||||
ProgressUpdateKind::Done => "Done",
|
ProgressUpdateKind::Done => "<b>done</b>",
|
||||||
};
|
};
|
||||||
|
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{time:.3}s {message} {engine}",
|
r#"<span class="progress-update-time">{time:>4}ms</span> {engine} {message}"#,
|
||||||
time = self.time,
|
time = self.time,
|
||||||
message = message,
|
message = message,
|
||||||
engine = self.engine.name()
|
engine = self.engine.name()
|
||||||
@ -183,7 +192,9 @@ pub async fn search(
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Response {
|
pub struct Response {
|
||||||
pub search_results: Vec<SearchResult>,
|
pub search_results: Vec<SearchResult>,
|
||||||
|
pub featured_snippet: Option<FeaturedSnippet>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
pub url: String,
|
pub url: String,
|
||||||
@ -193,8 +204,18 @@ pub struct SearchResult {
|
|||||||
pub score: f64,
|
pub score: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FeaturedSnippet {
|
||||||
|
pub url: String,
|
||||||
|
pub title: String,
|
||||||
|
pub description: String,
|
||||||
|
pub engine: Engine,
|
||||||
|
}
|
||||||
|
|
||||||
fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Response {
|
fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Response {
|
||||||
let mut search_results: Vec<SearchResult> = Vec::new();
|
let mut search_results: Vec<SearchResult> = Vec::new();
|
||||||
|
let mut featured_snippet: Option<FeaturedSnippet> = None;
|
||||||
|
|
||||||
for (engine, response) in responses {
|
for (engine, response) in responses {
|
||||||
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
|
for (result_index, search_result) in response.search_results.into_iter().enumerate() {
|
||||||
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a score of 0.33, etc.
|
// position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a score of 0.33, etc.
|
||||||
@ -230,9 +251,28 @@ fn merge_engine_responses(responses: HashMap<Engine, EngineResponse>) -> Respons
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(engine_featured_snippet) = response.featured_snippet {
|
||||||
|
// if it has a higher weight than the current featured snippet
|
||||||
|
let featured_snippet_weight = featured_snippet
|
||||||
|
.as_ref()
|
||||||
|
.map(|s| s.engine.weight())
|
||||||
|
.unwrap_or(0.);
|
||||||
|
if engine.weight() > featured_snippet_weight {
|
||||||
|
featured_snippet = Some(FeaturedSnippet {
|
||||||
|
url: engine_featured_snippet.url,
|
||||||
|
title: engine_featured_snippet.title,
|
||||||
|
description: engine_featured_snippet.description,
|
||||||
|
engine,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||||
|
|
||||||
Response { search_results }
|
Response {
|
||||||
|
search_results,
|
||||||
|
featured_snippet,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,16 +27,22 @@ pub fn request(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder
|
|||||||
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||||
parse_html_response_with_opts(
|
parse_html_response_with_opts(
|
||||||
body,
|
body,
|
||||||
ParseOpts {
|
ParseOpts::new()
|
||||||
result_item: "#b_results > li.b_algo",
|
.result("#b_results > li.b_algo")
|
||||||
title: ".b_algo h2 > a",
|
.title(".b_algo h2 > a")
|
||||||
href: QueryMethod::Manual(Box::new(|el: &ElementRef| {
|
.href(QueryMethod::Manual(Box::new(|el: &ElementRef| {
|
||||||
let url = el
|
let url = el
|
||||||
.select(&Selector::parse("a").unwrap())
|
.select(&Selector::parse("a[href]").unwrap())
|
||||||
.next()
|
.next()
|
||||||
.and_then(|n| n.value().attr("href"))
|
.and_then(|n| n.value().attr("href"))
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
clean_url(url)
|
||||||
|
})))
|
||||||
|
.description(".b_caption > p, p.b_algoSlug"),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clean_url(url: &str) -> eyre::Result<String> {
|
||||||
// clean up bing's tracking urls
|
// clean up bing's tracking urls
|
||||||
if url.starts_with("https://www.bing.com/ck/a?") {
|
if url.starts_with("https://www.bing.com/ck/a?") {
|
||||||
// get the u param
|
// get the u param
|
||||||
@ -50,13 +56,9 @@ pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
|||||||
let u = base64::engine::general_purpose::URL_SAFE_NO_PAD
|
let u = base64::engine::general_purpose::URL_SAFE_NO_PAD
|
||||||
.decode(&u[2..])
|
.decode(&u[2..])
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
// now normalize that one instead
|
// convert to utf8
|
||||||
Ok(String::from_utf8_lossy(&u).to_string())
|
Ok(String::from_utf8_lossy(&u).to_string())
|
||||||
} else {
|
} else {
|
||||||
Ok(url.to_string())
|
Ok(url.to_string())
|
||||||
}
|
}
|
||||||
})),
|
|
||||||
description: ".b_caption > p, p.b_algoSlug",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
@ -18,11 +18,10 @@ pub fn request(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder
|
|||||||
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||||
parse_html_response_with_opts(
|
parse_html_response_with_opts(
|
||||||
body,
|
body,
|
||||||
ParseOpts {
|
ParseOpts::new()
|
||||||
result_item: "#results > .snippet[data-pos]:not(.standalone)",
|
.result("#results > .snippet[data-pos]:not(.standalone)")
|
||||||
title: ".url",
|
.title(".url")
|
||||||
href: "a",
|
.href("a")
|
||||||
description: ".snippet-content",
|
.description(".snippet-content"),
|
||||||
},
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
use reqwest::Url;
|
use reqwest::Url;
|
||||||
|
use scraper::{ElementRef, Selector};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
engines::EngineResponse,
|
engines::EngineResponse,
|
||||||
parse::{parse_html_response_with_opts, ParseOpts},
|
parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn request(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
|
pub fn request(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder {
|
||||||
@ -25,11 +26,36 @@ pub fn request(client: &reqwest::Client, query: &str) -> reqwest::RequestBuilder
|
|||||||
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
|
||||||
parse_html_response_with_opts(
|
parse_html_response_with_opts(
|
||||||
body,
|
body,
|
||||||
ParseOpts {
|
ParseOpts::new()
|
||||||
result_item: "div.g, div.xpd",
|
.result("div.g, div.xpd")
|
||||||
title: "h3",
|
.title("h3")
|
||||||
href: "a",
|
.href("a[href]")
|
||||||
description: "div[data-sncf], div[style='-webkit-line-clamp:2']",
|
.description("div[data-sncf], div[style='-webkit-line-clamp:2']")
|
||||||
},
|
.featured_snippet("block-component")
|
||||||
|
.featured_snippet_description("div[data-attrid='wa:/description'] > span:first-child")
|
||||||
|
.featured_snippet_title("h3")
|
||||||
|
.featured_snippet_href(QueryMethod::Manual(Box::new(|el: &ElementRef| {
|
||||||
|
let url = el
|
||||||
|
.select(&Selector::parse("a").unwrap())
|
||||||
|
.next()
|
||||||
|
.and_then(|n| n.value().attr("href"))
|
||||||
|
.unwrap_or_default();
|
||||||
|
clean_url(url)
|
||||||
|
}))),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn clean_url(url: &str) -> eyre::Result<String> {
|
||||||
|
if url.starts_with("/url?q=") {
|
||||||
|
// get the q param
|
||||||
|
let url = Url::parse(format!("https://www.google.com{url}").as_str())?;
|
||||||
|
let q = url
|
||||||
|
.query_pairs()
|
||||||
|
.find(|(key, _)| key == "q")
|
||||||
|
.unwrap_or_default()
|
||||||
|
.1;
|
||||||
|
Ok(q.to_string())
|
||||||
|
} else {
|
||||||
|
Ok(url.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
pub fn normalize_url(url: &str) -> eyre::Result<String> {
|
pub fn normalize_url(url: &str) -> eyre::Result<String> {
|
||||||
|
if url.is_empty() {
|
||||||
|
return Ok(String::new());
|
||||||
|
}
|
||||||
|
|
||||||
let mut url = Url::parse(url)?;
|
let mut url = Url::parse(url)?;
|
||||||
|
|
||||||
// make sure the scheme is https
|
// make sure the scheme is https
|
||||||
@ -32,5 +36,18 @@ pub fn normalize_url(url: &str) -> eyre::Result<String> {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(url.to_string());
|
// url decode and encode path
|
||||||
|
let path = url.path().to_string();
|
||||||
|
let path = urlencoding::decode(&path)?;
|
||||||
|
url.set_path(&path.to_string());
|
||||||
|
|
||||||
|
let url = url.to_string();
|
||||||
|
// remove trailing slash
|
||||||
|
let url = if let Some(url) = url.strip_suffix('/') {
|
||||||
|
url.to_string()
|
||||||
|
} else {
|
||||||
|
url
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(url);
|
||||||
}
|
}
|
||||||
|
182
src/parse.rs
182
src/parse.rs
@ -1,25 +1,81 @@
|
|||||||
//! Helper functions for parsing search engine responses.
|
//! Helper functions for parsing search engine responses.
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
engines::{EngineResponse, EngineSearchResult},
|
engines::{EngineFeaturedSnippet, EngineResponse, EngineSearchResult},
|
||||||
normalize::normalize_url,
|
normalize::normalize_url,
|
||||||
};
|
};
|
||||||
|
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
pub struct ParseOpts<A, B, C>
|
#[derive(Default)]
|
||||||
where
|
pub struct ParseOpts {
|
||||||
A: Into<QueryMethod>,
|
result: &'static str,
|
||||||
B: Into<QueryMethod>,
|
title: QueryMethod,
|
||||||
C: Into<QueryMethod>,
|
href: QueryMethod,
|
||||||
{
|
description: QueryMethod,
|
||||||
pub result_item: &'static str,
|
|
||||||
pub title: A,
|
featured_snippet: &'static str,
|
||||||
pub href: B,
|
featured_snippet_title: QueryMethod,
|
||||||
pub description: C,
|
featured_snippet_href: QueryMethod,
|
||||||
|
featured_snippet_description: QueryMethod,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ParseOpts {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn result(mut self, result: &'static str) -> Self {
|
||||||
|
self.result = result;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn title(mut self, title: impl Into<QueryMethod>) -> Self {
|
||||||
|
self.title = title.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn href(mut self, href: impl Into<QueryMethod>) -> Self {
|
||||||
|
self.href = href.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn description(mut self, description: impl Into<QueryMethod>) -> Self {
|
||||||
|
self.description = description.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn featured_snippet(mut self, featured_snippet: &'static str) -> Self {
|
||||||
|
self.featured_snippet = featured_snippet;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn featured_snippet_title(
|
||||||
|
mut self,
|
||||||
|
featured_snippet_title: impl Into<QueryMethod>,
|
||||||
|
) -> Self {
|
||||||
|
self.featured_snippet_title = featured_snippet_title.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn featured_snippet_href(mut self, featured_snippet_href: impl Into<QueryMethod>) -> Self {
|
||||||
|
self.featured_snippet_href = featured_snippet_href.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn featured_snippet_description(
|
||||||
|
mut self,
|
||||||
|
featured_snippet_description: impl Into<QueryMethod>,
|
||||||
|
) -> Self {
|
||||||
|
self.featured_snippet_description = featured_snippet_description.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
pub enum QueryMethod {
|
pub enum QueryMethod {
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
CssSelector(&'static str),
|
CssSelector(&'static str),
|
||||||
Manual(Box<dyn Fn(&scraper::ElementRef) -> eyre::Result<String>>),
|
Manual(Box<dyn Fn(&scraper::ElementRef) -> eyre::Result<String>>),
|
||||||
}
|
}
|
||||||
@ -30,66 +86,63 @@ impl From<&'static str> for QueryMethod {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn parse_html_response_with_opts<A, B, C>(
|
impl QueryMethod {
|
||||||
|
pub fn call_with_css_selector_override(
|
||||||
|
&self,
|
||||||
|
el: &scraper::ElementRef,
|
||||||
|
with_css_selector: impl Fn(&scraper::ElementRef, &'static str) -> Option<String>,
|
||||||
|
) -> eyre::Result<String> {
|
||||||
|
match self {
|
||||||
|
QueryMethod::None => Ok(String::new()),
|
||||||
|
QueryMethod::CssSelector(s) => Ok(with_css_selector(el, s).unwrap_or_default()),
|
||||||
|
QueryMethod::Manual(f) => f(el),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn call(&self, el: &scraper::ElementRef) -> eyre::Result<String> {
|
||||||
|
self.call_with_css_selector_override(el, |el, s| {
|
||||||
|
el.select(&Selector::parse(s).unwrap())
|
||||||
|
.next()
|
||||||
|
.map(|n| n.text().collect::<String>())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn parse_html_response_with_opts(
|
||||||
body: &str,
|
body: &str,
|
||||||
opts: ParseOpts<A, B, C>,
|
opts: ParseOpts,
|
||||||
) -> eyre::Result<EngineResponse>
|
) -> eyre::Result<EngineResponse> {
|
||||||
where
|
|
||||||
A: Into<QueryMethod>,
|
|
||||||
B: Into<QueryMethod>,
|
|
||||||
C: Into<QueryMethod>,
|
|
||||||
{
|
|
||||||
let dom = Html::parse_document(body);
|
let dom = Html::parse_document(body);
|
||||||
|
|
||||||
let mut search_results = Vec::new();
|
let mut search_results = Vec::new();
|
||||||
|
|
||||||
let ParseOpts {
|
let ParseOpts {
|
||||||
result_item: result_item_query,
|
result: result_item_query,
|
||||||
title: title_query_method,
|
title: title_query_method,
|
||||||
href: href_query_method,
|
href: href_query_method,
|
||||||
description: description_query_method,
|
description: description_query_method,
|
||||||
|
featured_snippet: featured_snippet_query,
|
||||||
|
featured_snippet_title: featured_snippet_title_query_method,
|
||||||
|
featured_snippet_href: featured_snippet_href_query_method,
|
||||||
|
featured_snippet_description: featured_snippet_description_query_method,
|
||||||
} = opts;
|
} = opts;
|
||||||
let title_query_method = title_query_method.into();
|
|
||||||
let href_query_method = href_query_method.into();
|
|
||||||
let description_query_method = description_query_method.into();
|
|
||||||
|
|
||||||
let result_item_query = Selector::parse(result_item_query).unwrap();
|
let result_item_query = Selector::parse(result_item_query).unwrap();
|
||||||
|
|
||||||
let result_items = dom.select(&result_item_query);
|
let results = dom.select(&result_item_query);
|
||||||
|
|
||||||
for result_item in result_items {
|
for result in results {
|
||||||
let title = match title_query_method {
|
let title = title_query_method.call(&result)?;
|
||||||
QueryMethod::CssSelector(s) => result_item
|
let url = href_query_method.call_with_css_selector_override(&result, |el, s| {
|
||||||
.select(&Selector::parse(s).unwrap())
|
el.select(&Selector::parse(s).unwrap()).next().map(|n| {
|
||||||
.next()
|
|
||||||
.map(|n| n.text().collect::<String>())
|
|
||||||
.unwrap_or_default(),
|
|
||||||
QueryMethod::Manual(ref f) => f(&result_item)?,
|
|
||||||
};
|
|
||||||
|
|
||||||
let url = match href_query_method {
|
|
||||||
QueryMethod::CssSelector(s) => result_item
|
|
||||||
.select(&Selector::parse(s).unwrap())
|
|
||||||
.next()
|
|
||||||
.map(|n| {
|
|
||||||
n.value()
|
n.value()
|
||||||
.attr("href")
|
.attr("href")
|
||||||
.map(str::to_string)
|
.map(str::to_string)
|
||||||
.unwrap_or_else(|| n.text().collect::<String>())
|
.unwrap_or_else(|| n.text().collect::<String>())
|
||||||
})
|
})
|
||||||
.unwrap_or_default(),
|
})?;
|
||||||
QueryMethod::Manual(ref f) => f(&result_item)?,
|
|
||||||
};
|
|
||||||
let url = normalize_url(&url)?;
|
let url = normalize_url(&url)?;
|
||||||
|
let description = description_query_method.call(&result)?;
|
||||||
let description = match description_query_method {
|
|
||||||
QueryMethod::CssSelector(s) => result_item
|
|
||||||
.select(&Selector::parse(s).unwrap())
|
|
||||||
.next()
|
|
||||||
.map(|n| n.text().collect::<String>())
|
|
||||||
.unwrap_or_default(),
|
|
||||||
QueryMethod::Manual(ref f) => f(&result_item)?,
|
|
||||||
};
|
|
||||||
|
|
||||||
search_results.push(EngineSearchResult {
|
search_results.push(EngineSearchResult {
|
||||||
url,
|
url,
|
||||||
@ -98,5 +151,30 @@ where
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(EngineResponse { search_results })
|
let featured_snippet = if !featured_snippet_query.is_empty() {
|
||||||
|
if let Some(featured_snippet) = dom
|
||||||
|
.select(&Selector::parse(featured_snippet_query).unwrap())
|
||||||
|
.next()
|
||||||
|
{
|
||||||
|
let title = featured_snippet_title_query_method.call(&featured_snippet)?;
|
||||||
|
let url = featured_snippet_href_query_method.call(&featured_snippet)?;
|
||||||
|
let url = normalize_url(&url)?;
|
||||||
|
let description = featured_snippet_description_query_method.call(&featured_snippet)?;
|
||||||
|
|
||||||
|
Some(EngineFeaturedSnippet {
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(EngineResponse {
|
||||||
|
search_results,
|
||||||
|
featured_snippet,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
@ -7,12 +7,12 @@
|
|||||||
<link rel="stylesheet" href="/style.css">
|
<link rel="stylesheet" href="/style.css">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<main>
|
<div class="main-container">
|
||||||
<h1>metasearch</h1>
|
<h1>metasearch</h1>
|
||||||
<form action="/search" method="get">
|
<form action="/search" method="get">
|
||||||
<input type="text" name="q" placeholder="Search" autofocus>
|
<input type="text" name="q" placeholder="Search" class="search-input" autofocus>
|
||||||
<input type="submit" value="Search">
|
<input type="submit" value="Search">
|
||||||
</form>
|
</form>
|
||||||
</main>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
@ -1,15 +1,20 @@
|
|||||||
|
html {
|
||||||
|
height: 100%;
|
||||||
|
}
|
||||||
body {
|
body {
|
||||||
font-family: monospace;
|
font-family: monospace;
|
||||||
background-color: #0b0e14;
|
background-color: #0b0e14;
|
||||||
color: #bfbdb6;
|
color: #bfbdb6;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
line-height: 1.2;
|
line-height: 1.2;
|
||||||
|
height: 100%;
|
||||||
}
|
}
|
||||||
main {
|
main {
|
||||||
max-width: 40rem;
|
max-width: 40rem;
|
||||||
padding: 1rem 0.5rem;
|
padding: 1rem 0.5rem;
|
||||||
margin: 0 auto;
|
margin: 0 auto;
|
||||||
background-color: #0d1017;
|
background-color: #0d1017;
|
||||||
|
height: 100%;
|
||||||
}
|
}
|
||||||
input {
|
input {
|
||||||
font-family: monospace;
|
font-family: monospace;
|
||||||
@ -23,21 +28,39 @@ input[type="submit"] {
|
|||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* index page */
|
||||||
|
.main-container {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
height: 100%;
|
||||||
|
justify-content: center;
|
||||||
|
margin: 0 auto;
|
||||||
|
width: fit-content;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
h1 {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* header */
|
||||||
.search-form {
|
.search-form {
|
||||||
margin-bottom: 1rem;
|
margin-bottom: 1rem;
|
||||||
}
|
}
|
||||||
|
.search-input {
|
||||||
.search-result {
|
width: 20em;
|
||||||
margin-bottom: 1rem;
|
|
||||||
padding-top: 1rem;
|
|
||||||
border-top: 1px solid #234;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* search result */
|
||||||
|
.search-result {
|
||||||
|
padding-top: 1rem;
|
||||||
|
border-top: 1px solid #234;
|
||||||
|
font-size: 1rem;
|
||||||
|
}
|
||||||
.search-result-anchor {
|
.search-result-anchor {
|
||||||
color: inherit;
|
color: inherit;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
.search-result-url {
|
.search-result-url {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
font-size: 0.8rem;
|
font-size: 0.8rem;
|
||||||
@ -45,26 +68,46 @@ input[type="submit"] {
|
|||||||
}
|
}
|
||||||
.search-result-title {
|
.search-result-title {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
font-size: 1.2em;
|
font-size: 1rem;
|
||||||
color: #29e;
|
color: #29e;
|
||||||
}
|
}
|
||||||
.search-result-description {
|
.search-result-description {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
font-size: 0.8rem;
|
font-size: 0.8em;
|
||||||
color: #bba;
|
color: #bba;
|
||||||
}
|
}
|
||||||
.search-result-engines {
|
|
||||||
|
/* engine list */
|
||||||
|
.engine-list {
|
||||||
opacity: 0.5;
|
opacity: 0.5;
|
||||||
float: right;
|
justify-content: end;
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 0.5em;
|
gap: 0.5em;
|
||||||
|
font-size: 0.8rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* featured snippet */
|
||||||
|
.featured-snippet {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
border: 1px solid #234;
|
||||||
|
padding: 0.5rem;
|
||||||
|
font-size: 1.2rem;
|
||||||
|
}
|
||||||
|
.featured-snippet .search-result-description {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* progress update */
|
||||||
.progress-updates {
|
.progress-updates {
|
||||||
margin-bottom: 1rem;
|
margin-bottom: 1rem;
|
||||||
border: 1px solid #234;
|
border: 1px solid #234;
|
||||||
padding: 0.5rem;
|
padding: 0.5rem;
|
||||||
|
min-height: 5em;
|
||||||
}
|
}
|
||||||
.progress-update {
|
.progress-update {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
|
white-space: pre;
|
||||||
|
}
|
||||||
|
.progress-update-time {
|
||||||
|
opacity: 0.5;
|
||||||
}
|
}
|
@ -1,8 +0,0 @@
|
|||||||
use axum::{http::header, response::IntoResponse};
|
|
||||||
|
|
||||||
pub async fn route() -> impl IntoResponse {
|
|
||||||
(
|
|
||||||
[(header::CONTENT_TYPE, "text/html; charset=utf-8")],
|
|
||||||
include_str!("index.html"),
|
|
||||||
)
|
|
||||||
}
|
|
@ -1,15 +1,33 @@
|
|||||||
pub mod index;
|
|
||||||
pub mod search;
|
pub mod search;
|
||||||
pub mod style_css;
|
|
||||||
|
|
||||||
use axum::{routing::get, Router};
|
use axum::{http::header, routing::get, Router};
|
||||||
|
|
||||||
|
pub const BIND_ADDRESS: &str = "[::]:3000";
|
||||||
|
|
||||||
pub async fn run() {
|
pub async fn run() {
|
||||||
let app = Router::new()
|
let app = Router::new()
|
||||||
.route("/", get(index::route))
|
.route(
|
||||||
.route("/style.css", get(style_css::route))
|
"/",
|
||||||
|
get(|| async {
|
||||||
|
(
|
||||||
|
[(header::CONTENT_TYPE, "text/html; charset=utf-8")],
|
||||||
|
include_str!("assets/index.html"),
|
||||||
|
)
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.route(
|
||||||
|
"/style.css",
|
||||||
|
get(|| async {
|
||||||
|
(
|
||||||
|
[(header::CONTENT_TYPE, "text/css; charset=utf-8")],
|
||||||
|
include_str!("assets/style.css"),
|
||||||
|
)
|
||||||
|
}),
|
||||||
|
)
|
||||||
.route("/search", get(search::route));
|
.route("/search", get(search::route));
|
||||||
|
|
||||||
let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap();
|
println!("Listening on {BIND_ADDRESS}");
|
||||||
|
|
||||||
|
let listener = tokio::net::TcpListener::bind(BIND_ADDRESS).await.unwrap();
|
||||||
axum::serve(listener, app).await.unwrap();
|
axum::serve(listener, app).await.unwrap();
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,7 @@ use axum::{
|
|||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use html_escape::{encode_text, encode_unquoted_attribute};
|
use html_escape::{encode_text, encode_unquoted_attribute};
|
||||||
|
|
||||||
use crate::engines;
|
use crate::engines::{self, Response};
|
||||||
|
|
||||||
fn render_beginning_of_html(query: &str) -> String {
|
fn render_beginning_of_html(query: &str) -> String {
|
||||||
format!(
|
format!(
|
||||||
@ -25,7 +25,7 @@ fn render_beginning_of_html(query: &str) -> String {
|
|||||||
<body>
|
<body>
|
||||||
<main>
|
<main>
|
||||||
<form action="/search" method="get" class="search-form">
|
<form action="/search" method="get" class="search-form">
|
||||||
<input type="text" name="q" placeholder="Search" value="{}">
|
<input type="text" name="q" placeholder="Search" value="{}" class="search-input" autofocus>
|
||||||
<input type="submit" value="Search">
|
<input type="submit" value="Search">
|
||||||
</form>
|
</form>
|
||||||
<div class="progress-updates">
|
<div class="progress-updates">
|
||||||
@ -39,19 +39,18 @@ fn render_end_of_html() -> String {
|
|||||||
r#"</main></body></html>"#.to_string()
|
r#"</main></body></html>"#.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_search_result(result: &engines::SearchResult) -> String {
|
fn render_engine_list(engines: &[engines::Engine]) -> String {
|
||||||
let engines_html = result
|
let mut html = String::new();
|
||||||
.engines
|
for engine in engines {
|
||||||
.iter()
|
html.push_str(&format!(
|
||||||
.map(|engine| {
|
r#"<span class="engine-list-item">{engine}</span>"#,
|
||||||
format!(
|
engine = encode_text(&engine.name())
|
||||||
r#"<span class="search-result-engines-item">{}</span>"#,
|
));
|
||||||
encode_text(&engine.name())
|
}
|
||||||
)
|
format!(r#"<div class="engine-list">{html}</div>"#)
|
||||||
})
|
}
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join("");
|
|
||||||
|
|
||||||
|
fn render_search_result(result: &engines::SearchResult) -> String {
|
||||||
format!(
|
format!(
|
||||||
r#"<div class="search-result">
|
r#"<div class="search-result">
|
||||||
<a class="search-result-anchor" href="{url_attr}">
|
<a class="search-result-anchor" href="{url_attr}">
|
||||||
@ -59,16 +58,47 @@ fn render_search_result(result: &engines::SearchResult) -> String {
|
|||||||
<h3 class="search-result-title">{title}</h3>
|
<h3 class="search-result-title">{title}</h3>
|
||||||
</a>
|
</a>
|
||||||
<p class="search-result-description">{desc}</p>
|
<p class="search-result-description">{desc}</p>
|
||||||
<div class="search-result-engines">{engines_html}</div>
|
{engines_html}
|
||||||
</div>
|
</div>
|
||||||
"#,
|
"#,
|
||||||
url_attr = encode_unquoted_attribute(&result.url),
|
url_attr = encode_unquoted_attribute(&result.url),
|
||||||
url = encode_text(&result.url),
|
url = encode_text(&result.url),
|
||||||
title = encode_text(&result.title),
|
title = encode_text(&result.title),
|
||||||
desc = encode_text(&result.description)
|
desc = encode_text(&result.description),
|
||||||
|
engines_html = render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>())
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> String {
|
||||||
|
format!(
|
||||||
|
r#"<div class="featured-snippet">
|
||||||
|
<p class="search-result-description">{desc}</p>
|
||||||
|
<a class="search-result-anchor" href="{url_attr}">
|
||||||
|
<span class="search-result-url" href="{url_attr}">{url}</span>
|
||||||
|
<h3 class="search-result-title">{title}</h3>
|
||||||
|
</a>
|
||||||
|
{engines_html}
|
||||||
|
</div>
|
||||||
|
"#,
|
||||||
|
desc = encode_text(&featured_snippet.description),
|
||||||
|
url_attr = encode_unquoted_attribute(&featured_snippet.url),
|
||||||
|
url = encode_text(&featured_snippet.url),
|
||||||
|
title = encode_text(&featured_snippet.title),
|
||||||
|
engines_html = render_engine_list(&[featured_snippet.engine])
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_results(response: Response) -> String {
|
||||||
|
let mut html = String::new();
|
||||||
|
if let Some(featured_snippet) = response.featured_snippet {
|
||||||
|
html.push_str(&render_featured_snippet(&featured_snippet));
|
||||||
|
}
|
||||||
|
for result in &response.search_results {
|
||||||
|
html.push_str(&render_search_result(result));
|
||||||
|
}
|
||||||
|
html
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
|
pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoResponse {
|
||||||
let query = params
|
let query = params
|
||||||
.get("q")
|
.get("q")
|
||||||
@ -99,8 +129,7 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
|
|||||||
|
|
||||||
while let Some(progress_update) = progress_rx.recv().await {
|
while let Some(progress_update) = progress_rx.recv().await {
|
||||||
let progress_html = format!(
|
let progress_html = format!(
|
||||||
r#"<p class="progress-update">{}</p>"#,
|
r#"<p class="progress-update">{progress_update}</p>"#
|
||||||
encode_text(&progress_update.to_string())
|
|
||||||
);
|
);
|
||||||
yield R::Ok(Bytes::from(progress_html));
|
yield R::Ok(Bytes::from(progress_html));
|
||||||
}
|
}
|
||||||
@ -121,9 +150,7 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
|
|||||||
|
|
||||||
second_half.push_str("</div>"); // close progress-updates
|
second_half.push_str("</div>"); // close progress-updates
|
||||||
second_half.push_str("<style>.progress-updates{display:none}</style>");
|
second_half.push_str("<style>.progress-updates{display:none}</style>");
|
||||||
for result in results.search_results {
|
second_half.push_str(&render_results(results));
|
||||||
second_half.push_str(&render_search_result(&result));
|
|
||||||
}
|
|
||||||
second_half.push_str(&render_end_of_html());
|
second_half.push_str(&render_end_of_html());
|
||||||
|
|
||||||
yield Ok(Bytes::from(second_half));
|
yield Ok(Bytes::from(second_half));
|
||||||
|
@ -1,8 +0,0 @@
|
|||||||
use axum::{http::header, response::IntoResponse};
|
|
||||||
|
|
||||||
pub async fn route() -> impl IntoResponse {
|
|
||||||
(
|
|
||||||
[(header::CONTENT_TYPE, "text/css; charset=utf-8")],
|
|
||||||
include_str!("style.css"),
|
|
||||||
)
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user