docs.rs and stackexchange support
This commit is contained in:
parent
68961193a8
commit
540d01981c
6
README
6
README
@ -5,9 +5,9 @@ it sources from google, bing, brave, and a few others.
|
||||
it's written in rust using no templating engine and with as little client-side
|
||||
javascript as possible.
|
||||
|
||||
metasearch is a single binary with no cli or configuration file. if you want to
|
||||
configure it (like to change the default port or weights of engines) then you
|
||||
have to modify the source.
|
||||
metasearch2 is a single binary with no cli or configuration file. if you want
|
||||
to configure it (like to change the default port or weights of engines) then
|
||||
you have to modify the source.
|
||||
|
||||
build it with `cargo b -r`, the resulting binary will be in
|
||||
`target/release/metasearch2`. it runs on port 28019.
|
||||
|
@ -27,8 +27,9 @@ pub enum Engine {
|
||||
Calc,
|
||||
Wikipedia,
|
||||
// post-search
|
||||
StackOverflow,
|
||||
StackExchange,
|
||||
GitHub,
|
||||
DocsRs,
|
||||
}
|
||||
|
||||
impl Engine {
|
||||
@ -41,8 +42,9 @@ impl Engine {
|
||||
Engine::Ip,
|
||||
Engine::Calc,
|
||||
Engine::Wikipedia,
|
||||
Engine::StackOverflow,
|
||||
Engine::StackExchange,
|
||||
Engine::GitHub,
|
||||
Engine::DocsRs,
|
||||
]
|
||||
}
|
||||
|
||||
@ -55,8 +57,9 @@ impl Engine {
|
||||
Engine::Ip => "ip",
|
||||
Engine::Calc => "calc",
|
||||
Engine::Wikipedia => "wikipedia",
|
||||
Engine::StackOverflow => "stackoverflow",
|
||||
Engine::StackExchange => "stackexchange",
|
||||
Engine::GitHub => "github",
|
||||
Engine::DocsRs => "docs.rs",
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,16 +112,18 @@ impl Engine {
|
||||
|
||||
pub fn postsearch_request(&self, response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||
match self {
|
||||
Engine::StackOverflow => postsearch::stackoverflow::request(response),
|
||||
Engine::StackExchange => postsearch::stackexchange::request(response),
|
||||
Engine::GitHub => postsearch::github::request(response),
|
||||
Engine::DocsRs => postsearch::docs_rs::request(response),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn postsearch_parse_response(&self, body: &str) -> Option<String> {
|
||||
match self {
|
||||
Engine::StackOverflow => postsearch::stackoverflow::parse_response(body),
|
||||
Engine::StackExchange => postsearch::stackexchange::parse_response(body),
|
||||
Engine::GitHub => postsearch::github::parse_response(body),
|
||||
Engine::DocsRs => postsearch::docs_rs::parse_response(body),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@ -372,6 +377,8 @@ pub async fn search_with_engines(
|
||||
ProgressUpdateData::PostSearchInfobox(Infobox { html, engine }),
|
||||
start_time,
|
||||
))?;
|
||||
// break so we don't send multiple infoboxes
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,5 +2,6 @@
|
||||
//! results. They can only show stuff in infoboxes and don't get requested if
|
||||
//! an infobox was added by another earlier engine.
|
||||
|
||||
pub mod docs_rs;
|
||||
pub mod github;
|
||||
pub mod stackoverflow;
|
||||
pub mod stackexchange;
|
||||
|
63
src/engines/postsearch/docs_rs.rs
Normal file
63
src/engines/postsearch/docs_rs.rs
Normal file
@ -0,0 +1,63 @@
|
||||
use reqwest::Url;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::engines::{Response, CLIENT};
|
||||
|
||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||
for search_result in response.search_results.iter().take(8) {
|
||||
if search_result.url.starts_with("https://docs.rs/") {
|
||||
return Some(CLIENT.get(search_result.url.as_str()).header(
|
||||
"User-Agent",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_response(body: &str) -> Option<String> {
|
||||
let dom = Html::parse_document(body);
|
||||
|
||||
let title = dom
|
||||
.select(&Selector::parse("h2 a").unwrap())
|
||||
.next()?
|
||||
.text()
|
||||
.collect::<String>();
|
||||
let version = dom
|
||||
.select(&Selector::parse("h2 .version").unwrap())
|
||||
.next()?
|
||||
.text()
|
||||
.collect::<String>();
|
||||
|
||||
let url = Url::join(
|
||||
&Url::parse("https://docs.rs").unwrap(),
|
||||
&dom.select(
|
||||
&Selector::parse("ul.pure-menu-list li.pure-menu-item:nth-last-child(2) a").unwrap(),
|
||||
)
|
||||
.next()?
|
||||
.value()
|
||||
.attr("href")?
|
||||
.replace("/crate/", "/"),
|
||||
)
|
||||
.ok()?;
|
||||
|
||||
let doc_query = Selector::parse(".docblock").unwrap();
|
||||
|
||||
let doc = dom.select(&doc_query).next()?;
|
||||
let doc_html = doc.inner_html();
|
||||
let doc_html = ammonia::Builder::default()
|
||||
.link_rel(None)
|
||||
.url_relative(ammonia::UrlRelative::RewriteWithBase(
|
||||
Url::parse("https://docs.rs").unwrap(),
|
||||
))
|
||||
.clean(&doc_html)
|
||||
.to_string();
|
||||
|
||||
Some(format!(
|
||||
r#"<h2>Crate <a href="{url}">{title} {version}</a></h2>
|
||||
<div class="infobox-docs.rs-answer">{doc_html}</div>"#,
|
||||
url = html_escape::encode_quoted_attribute(&url.to_string()),
|
||||
title = html_escape::encode_text(&title),
|
||||
))
|
||||
}
|
@ -1,13 +1,12 @@
|
||||
use reqwest::Url;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::engines::{Response, CLIENT};
|
||||
use crate::engines::{answer::regex, Response, CLIENT};
|
||||
|
||||
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
||||
for search_result in response.search_results.iter().take(8) {
|
||||
if search_result
|
||||
.url
|
||||
.starts_with("https://stackoverflow.com/questions/")
|
||||
if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+")
|
||||
.is_match(&search_result.url)
|
||||
{
|
||||
return Some(CLIENT.get(search_result.url.as_str()).header(
|
||||
"User-Agent",
|
||||
@ -27,8 +26,14 @@ pub fn parse_response(body: &str) -> Option<String> {
|
||||
.next()?
|
||||
.text()
|
||||
.collect::<String>();
|
||||
|
||||
let base_url = dom
|
||||
.select(&Selector::parse("link[rel=canonical]").unwrap())
|
||||
.next()?
|
||||
.value()
|
||||
.attr("href")?;
|
||||
let url = Url::join(
|
||||
&Url::parse("https://stackoverflow.com").unwrap(),
|
||||
&Url::parse(base_url).unwrap(),
|
||||
dom.select(&Selector::parse(".question-hyperlink").unwrap())
|
||||
.next()?
|
||||
.value()
|
||||
@ -50,7 +55,7 @@ pub fn parse_response(body: &str) -> Option<String> {
|
||||
|
||||
Some(format!(
|
||||
r#"<a href="{url}"><h2>{title}</h2></a>
|
||||
<div class="infobox-stackoverflow-answer">{answer_html}</div>"#,
|
||||
<div class="infobox-stackexchange-answer">{answer_html}</div>"#,
|
||||
url = html_escape::encode_quoted_attribute(&url.to_string()),
|
||||
title = html_escape::encode_text(&title),
|
||||
))
|
@ -143,7 +143,6 @@ pub(super) fn parse_html_response_with_opts(
|
||||
.unwrap_or_else(|| n.text().collect::<String>())
|
||||
})
|
||||
})?;
|
||||
let url = normalize_url(&url)?;
|
||||
let description = description_query_method.call(&result)?;
|
||||
|
||||
// this can happen on google if you search "roll d6"
|
||||
@ -152,6 +151,13 @@ pub(super) fn parse_html_response_with_opts(
|
||||
continue;
|
||||
}
|
||||
|
||||
// this can happen on google if it gives you a featured snippet
|
||||
if description.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let url = normalize_url(&url)?;
|
||||
|
||||
search_results.push(EngineSearchResult {
|
||||
url,
|
||||
title,
|
||||
|
@ -10,7 +10,7 @@ let lastValue = "";
|
||||
async function updateSuggestions() {
|
||||
const value = searchInputEl.value;
|
||||
|
||||
if (value.trim() === "") {
|
||||
if (value.trim() === "" || value.length > 65) {
|
||||
renderSuggestions([]);
|
||||
return;
|
||||
}
|
||||
|
@ -222,14 +222,14 @@ h1 {
|
||||
.infobox p {
|
||||
margin: 0;
|
||||
}
|
||||
.infobox-stackoverflow-answer pre > code,
|
||||
.infobox-stackexchange-answer pre > code,
|
||||
.infobox-github-readme pre {
|
||||
border: 1px solid #234;
|
||||
padding: 0.5rem;
|
||||
display: block;
|
||||
font-weight: normal;
|
||||
}
|
||||
.infobox-stackoverflow-answer code,
|
||||
.infobox-stackexchange-answer code,
|
||||
.infobox-github-readme code {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ pub async fn route(Query(params): Query<HashMap<String, String>>) -> impl IntoRe
|
||||
let res = match engines::autocomplete(&query).await {
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
eprintln!("Error: {}", err);
|
||||
eprintln!("Autocomplete error for {query}: {}", err);
|
||||
return (StatusCode::INTERNAL_SERVER_ERROR, Json((query, vec![])));
|
||||
}
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user