docs.rs and stackexchange support
This commit is contained in:
parent
68961193a8
commit
540d01981c
6
README
6
README
@ -5,9 +5,9 @@ it sources from google, bing, brave, and a few others.
|
|||||||
it's written in rust using no templating engine and with as little client-side
|
it's written in rust using no templating engine and with as little client-side
|
||||||
javascript as possible.
|
javascript as possible.
|
||||||
|
|
||||||
metasearch is a single binary with no cli or configuration file. if you want to
|
metasearch2 is a single binary with no cli or configuration file. if you want
|
||||||
configure it (like to change the default port or weights of engines) then you
|
to configure it (like to change the default port or weights of engines) then
|
||||||
have to modify the source.
|
you have to modify the source.
|
||||||
|
|
||||||
build it with `cargo b -r`, the resulting binary will be in
|
build it with `cargo b -r`, the resulting binary will be in
|
||||||
`target/release/metasearch2`. it runs on port 28019.
|
`target/release/metasearch2`. it runs on port 28019.
|
||||||
|
@ -27,8 +27,9 @@ pub enum Engine {
|
|||||||
Calc,
|
Calc,
|
||||||
Wikipedia,
|
Wikipedia,
|
||||||
// post-search
|
// post-search
|
||||||
StackOverflow,
|
StackExchange,
|
||||||
GitHub,
|
GitHub,
|
||||||
|
DocsRs,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Engine {
|
impl Engine {
|
||||||
@ -41,8 +42,9 @@ impl Engine {
|
|||||||
Engine::Ip,
|
Engine::Ip,
|
||||||
Engine::Calc,
|
Engine::Calc,
|
||||||
Engine::Wikipedia,
|
Engine::Wikipedia,
|
||||||
Engine::StackOverflow,
|
Engine::StackExchange,
|
||||||
Engine::GitHub,
|
Engine::GitHub,
|
||||||
|
Engine::DocsRs,
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,8 +57,9 @@ impl Engine {
|
|||||||
Engine::Ip => "ip",
|
Engine::Ip => "ip",
|
||||||
Engine::Calc => "calc",
|
Engine::Calc => "calc",
|
||||||
Engine::Wikipedia => "wikipedia",
|
Engine::Wikipedia => "wikipedia",
|
||||||
Engine::StackOverflow => "stackoverflow",
|
Engine::StackExchange => "stackexchange",
|
||||||
Engine::GitHub => "github",
|
Engine::GitHub => "github",
|
||||||
|
Engine::DocsRs => "docs.rs",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,16 +112,18 @@ impl Engine {
|
|||||||
|
|
||||||
pub fn postsearch_request(&self, response: &Response) -> Option<reqwest::RequestBuilder> {
|
pub fn postsearch_request(&self, response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||||
match self {
|
match self {
|
||||||
Engine::StackOverflow => postsearch::stackoverflow::request(response),
|
Engine::StackExchange => postsearch::stackexchange::request(response),
|
||||||
Engine::GitHub => postsearch::github::request(response),
|
Engine::GitHub => postsearch::github::request(response),
|
||||||
|
Engine::DocsRs => postsearch::docs_rs::request(response),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn postsearch_parse_response(&self, body: &str) -> Option<String> {
|
pub fn postsearch_parse_response(&self, body: &str) -> Option<String> {
|
||||||
match self {
|
match self {
|
||||||
Engine::StackOverflow => postsearch::stackoverflow::parse_response(body),
|
Engine::StackExchange => postsearch::stackexchange::parse_response(body),
|
||||||
Engine::GitHub => postsearch::github::parse_response(body),
|
Engine::GitHub => postsearch::github::parse_response(body),
|
||||||
|
Engine::DocsRs => postsearch::docs_rs::parse_response(body),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -372,6 +377,8 @@ pub async fn search_with_engines(
|
|||||||
ProgressUpdateData::PostSearchInfobox(Infobox { html, engine }),
|
ProgressUpdateData::PostSearchInfobox(Infobox { html, engine }),
|
||||||
start_time,
|
start_time,
|
||||||
))?;
|
))?;
|
||||||
|
// break so we don't send multiple infoboxes
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,5 +2,6 @@
|
|||||||
//! results. They can only show stuff in infoboxes and don't get requested if
|
//! results. They can only show stuff in infoboxes and don't get requested if
|
||||||
//! an infobox was added by another earlier engine.
|
//! an infobox was added by another earlier engine.
|
||||||
|
|
||||||
|
pub mod docs_rs;
|
||||||
pub mod github;
|
pub mod github;
|
||||||
pub mod stackoverflow;
|
pub mod stackexchange;
|
||||||
|
63
src/engines/postsearch/docs_rs.rs
Normal file
63
src/engines/postsearch/docs_rs.rs
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
use reqwest::Url;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
use crate::engines::{Response, CLIENT};
|
||||||
|
|
||||||
|
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||||
|
for search_result in response.search_results.iter().take(8) {
|
||||||
|
if search_result.url.starts_with("https://docs.rs/") {
|
||||||
|
return Some(CLIENT.get(search_result.url.as_str()).header(
|
||||||
|
"User-Agent",
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_response(body: &str) -> Option<String> {
|
||||||
|
let dom = Html::parse_document(body);
|
||||||
|
|
||||||
|
let title = dom
|
||||||
|
.select(&Selector::parse("h2 a").unwrap())
|
||||||
|
.next()?
|
||||||
|
.text()
|
||||||
|
.collect::<String>();
|
||||||
|
let version = dom
|
||||||
|
.select(&Selector::parse("h2 .version").unwrap())
|
||||||
|
.next()?
|
||||||
|
.text()
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
|
let url = Url::join(
|
||||||
|
&Url::parse("https://docs.rs").unwrap(),
|
||||||
|
&dom.select(
|
||||||
|
&Selector::parse("ul.pure-menu-list li.pure-menu-item:nth-last-child(2) a").unwrap(),
|
||||||
|
)
|
||||||
|
.next()?
|
||||||
|
.value()
|
||||||
|
.attr("href")?
|
||||||
|
.replace("/crate/", "/"),
|
||||||
|
)
|
||||||
|
.ok()?;
|
||||||
|
|
||||||
|
let doc_query = Selector::parse(".docblock").unwrap();
|
||||||
|
|
||||||
|
let doc = dom.select(&doc_query).next()?;
|
||||||
|
let doc_html = doc.inner_html();
|
||||||
|
let doc_html = ammonia::Builder::default()
|
||||||
|
.link_rel(None)
|
||||||
|
.url_relative(ammonia::UrlRelative::RewriteWithBase(
|
||||||
|
Url::parse("https://docs.rs").unwrap(),
|
||||||
|
))
|
||||||
|
.clean(&doc_html)
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
Some(format!(
|
||||||
|
r#"<h2>Crate <a href="{url}">{title} {version}</a></h2>
|
||||||
|
<div class="infobox-docs.rs-answer">{doc_html}</div>"#,
|
||||||
|
url = html_escape::encode_quoted_attribute(&url.to_string()),
|
||||||
|
title = html_escape::encode_text(&title),
|
||||||
|
))
|
||||||
|
}
|
@ -1,13 +1,12 @@
|
|||||||
use reqwest::Url;
|
use reqwest::Url;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
use crate::engines::{Response, CLIENT};
|
use crate::engines::{answer::regex, Response, CLIENT};
|
||||||
|
|
||||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||||
for search_result in response.search_results.iter().take(8) {
|
for search_result in response.search_results.iter().take(8) {
|
||||||
if search_result
|
if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+")
|
||||||
.url
|
.is_match(&search_result.url)
|
||||||
.starts_with("https://stackoverflow.com/questions/")
|
|
||||||
{
|
{
|
||||||
return Some(CLIENT.get(search_result.url.as_str()).header(
|
return Some(CLIENT.get(search_result.url.as_str()).header(
|
||||||
"User-Agent",
|
"User-Agent",
|
||||||
@ -27,8 +26,14 @@ pub fn parse_response(body: &str) -> Option<String> {
|
|||||||
.next()?
|
.next()?
|
||||||
.text()
|
.text()
|
||||||
.collect::<String>();
|
.collect::<String>();
|
||||||
|
|
||||||
|
let base_url = dom
|
||||||
|
.select(&Selector::parse("link[rel=canonical]").unwrap())
|
||||||
|
.next()?
|
||||||
|
.value()
|
||||||
|
.attr("href")?;
|
||||||
let url = Url::join(
|
let url = Url::join(
|
||||||
&Url::parse("https://stackoverflow.com").unwrap(),
|
&Url::parse(base_url).unwrap(),
|
||||||
dom.select(&Selector::parse(".question-hyperlink").unwrap())
|
dom.select(&Selector::parse(".question-hyperlink").unwrap())
|
||||||
.next()?
|
.next()?
|
||||||
.value()
|
.value()
|
||||||
@ -50,7 +55,7 @@ pub fn parse_response(body: &str) -> Option<String> {
|
|||||||
|
|
||||||
Some(format!(
|
Some(format!(
|
||||||
r#"<a href="{url}"><h2>{title}</h2></a>
|
r#"<a href="{url}"><h2>{title}</h2></a>
|
||||||
<div class="infobox-stackoverflow-answer">{answer_html}</div>"#,
|
<div class="infobox-stackexchange-answer">{answer_html}</div>"#,
|
||||||
url = html_escape::encode_quoted_attribute(&url.to_string()),
|
url = html_escape::encode_quoted_attribute(&url.to_string()),
|
||||||
title = html_escape::encode_text(&title),
|
title = html_escape::encode_text(&title),
|
||||||
))
|
))
|
@ -143,7 +143,6 @@ pub(super) fn parse_html_response_with_opts(
|
|||||||
.unwrap_or_else(|| n.text().collect::<String>())
|
.unwrap_or_else(|| n.text().collect::<String>())
|
||||||
})
|
})
|
||||||
})?;
|
})?;
|
||||||
let url = normalize_url(&url)?;
|
|
||||||
let description = description_query_method.call(&result)?;
|
let description = description_query_method.call(&result)?;
|
||||||
|
|
||||||
// this can happen on google if you search "roll d6"
|
// this can happen on google if you search "roll d6"
|
||||||
@ -152,6 +151,13 @@ pub(super) fn parse_html_response_with_opts(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this can happen on google if it gives you a featured snippet
|
||||||
|
if description.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let url = normalize_url(&url)?;
|
||||||
|
|
||||||
search_results.push(EngineSearchResult {
|
search_results.push(EngineSearchResult {
|
||||||
url,
|
url,
|
||||||
title,
|
title,
|
||||||
|
@ -10,7 +10,7 @@ let lastValue = "";
|
|||||||
async function updateSuggestions() {
|
async function updateSuggestions() {
|
||||||
const value = searchInputEl.value;
|
const value = searchInputEl.value;
|
||||||
|
|
||||||
if (value.trim() === "") {
|
if (value.trim() === "" || value.length > 65) {
|
||||||
renderSuggestions([]);
|
renderSuggestions([]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -222,14 +222,14 @@ h1 {
|
|||||||
.infobox p {
|
.infobox p {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
}
|
}
|
||||||
.infobox-stackoverflow-answer pre > code,
|
.infobox-stackexchange-answer pre > code,
|
||||||
.infobox-github-readme pre {
|
.infobox-github-readme pre {
|
||||||
border: 1px solid #234;
|
border: 1px solid #234;
|
||||||
padding: 0.5rem;
|
padding: 0.5rem;
|
||||||
display: block;
|
display: block;
|
||||||
font-weight: normal;
|
font-weight: normal;
|
||||||
}
|
}
|
||||||
.infobox-stackoverflow-answer code,
|
.infobox-stackexchange-answer code,
|
||||||
.infobox-github-readme code {
|
.infobox-github-readme code {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
|
|||||||
let res = match engines::autocomplete(&query).await {
|
let res = match engines::autocomplete(&query).await {
|
||||||
Ok(res) => res,
|
Ok(res) => res,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
eprintln!("Error: {}", err);
|
eprintln!("Autocomplete error for {query}: {}", err);
|
||||||
return (StatusCode::INTERNAL_SERVER_ERROR, Json((query, vec![])));
|
return (StatusCode::INTERNAL_SERVER_ERROR, Json((query, vec![])));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user