github infobox

This commit is contained in:
mat 2023-12-20 23:17:39 -06:00
parent f95c5fe273
commit 60914e6e7f
9 changed files with 93 additions and 3 deletions

20
Cargo.lock generated
View File

@ -39,6 +39,19 @@ dependencies = [
"memchr",
]
[[package]]
name = "ammonia"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e6d1c7838db705c9b756557ee27c384ce695a1c51a6fe528784cb1c6840170"
dependencies = [
"html5ever",
"maplit",
"once_cell",
"tendril",
"url",
]
[[package]]
name = "anyhow"
version = "1.0.75"
@ -753,6 +766,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "markup5ever"
version = "0.11.0"
@ -792,6 +811,7 @@ checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
name = "metasearch2"
version = "0.1.0"
dependencies = [
"ammonia",
"anyhow",
"async-stream",
"axum",

View File

@ -6,6 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ammonia = "3.3.0"
anyhow = "1.0.75"
async-stream = "0.3.5"
axum = { version = "0.7.2", features = ["http2"] }

View File

@ -6,7 +6,6 @@ use serde::Deserialize;
use crate::engines::{EngineResponse, CLIENT};
pub fn request(query: &str) -> reqwest::RequestBuilder {
println!("request wikipedia");
CLIENT
.get(
Url::parse_with_params(

View File

@ -28,6 +28,7 @@ pub enum Engine {
Wikipedia,
// post-search
StackOverflow,
GitHub,
}
impl Engine {
@ -41,6 +42,7 @@ impl Engine {
Engine::Calc,
Engine::Wikipedia,
Engine::StackOverflow,
Engine::GitHub,
]
}
@ -54,6 +56,7 @@ impl Engine {
Engine::Calc => "calc",
Engine::Wikipedia => "wikipedia",
Engine::StackOverflow => "stackoverflow",
Engine::GitHub => "github",
}
}
@ -107,6 +110,7 @@ impl Engine {
pub fn postsearch_request(&self, response: &Response) -> Option<reqwest::RequestBuilder> {
match self {
Engine::StackOverflow => postsearch::stackoverflow::request(response),
Engine::GitHub => postsearch::github::request(response),
_ => None,
}
}
@ -114,6 +118,7 @@ impl Engine {
pub fn postsearch_parse_response(&self, body: &str) -> Option<String> {
match self {
Engine::StackOverflow => postsearch::stackoverflow::parse_response(body),
Engine::GitHub => postsearch::github::parse_response(body),
_ => None,
}
}

View File

@ -2,4 +2,5 @@
//! results. They can only show stuff in infoboxes and don't get requested if
//! an infobox was added by another earlier engine.
pub mod github;
pub mod stackoverflow;

View File

@ -0,0 +1,56 @@
use reqwest::Url;
use scraper::{Html, Selector};
use crate::engines::{Response, CLIENT};
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
for search_result in response.search_results.iter().take(8) {
if search_result.url.starts_with("https://github.com/") {
return Some(CLIENT.get(search_result.url.as_str()).header(
"User-Agent",
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
));
}
}
None
}
pub fn parse_response(body: &str) -> Option<String> {
let dom = Html::parse_document(body);
let url = Url::join(
&Url::parse("https://github.com").unwrap(),
dom.select(&Selector::parse("main #repository-container-header a").unwrap())
.next()?
.value()
.attr("href")?,
)
.ok()?
.to_string();
let readme = dom.select(&Selector::parse("article").unwrap()).next()?;
let readme_html = readme.inner_html().trim().to_string();
let mut readme_html = ammonia::Builder::default()
.link_rel(None)
.url_relative(ammonia::UrlRelative::RewriteWithBase(
Url::parse("https://github.com").unwrap(),
))
.clean(&readme_html)
.to_string();
let readme_dom = Html::parse_fragment(&readme_html);
let title_el = readme_dom.select(&Selector::parse("h1").unwrap()).next()?;
let title_html = title_el.html().trim().to_string();
if readme_html.starts_with(&title_html) {
readme_html = readme_html[title_html.len()..].to_string();
}
let title = title_el.text().collect::<String>();
Some(format!(
r#"<a href="{url}"><h1>{title}</h1></a>
<div class="infobox-github-readme">{readme_html}</div>"#,
url = html_escape::encode_quoted_attribute(&url),
title = html_escape::encode_text(&title),
))
}

View File

@ -49,7 +49,7 @@ pub fn parse_response(body: &str) -> Option<String> {
let url = format!("{url}#{answer_id}");
Some(format!(
r#"<a href="{url}" class="title"><h2>{title}</h2></a>
r#"<a href="{url}"><h2>{title}</h2></a>
<div class="infobox-stackoverflow-answer">{answer_html}</div>"#,
url = html_escape::encode_quoted_attribute(&url.to_string()),
title = html_escape::encode_text(&title),

View File

@ -22,3 +22,11 @@ searchInputEl.addEventListener("input", async (e) => {
datalistEl.appendChild(optionEl);
});
});
// if the user starts typing but they don't have focus on the input, focus it
document.addEventListener("keydown", (e) => {
// must be a letter or number
if (e.key.match(/^[a-z0-9]$/i) && !searchInputEl.matches(":focus")) {
searchInputEl.focus();
}
});

View File

@ -11,7 +11,7 @@ use bytes::Bytes;
use html_escape::{encode_text, encode_unquoted_attribute};
use crate::engines::{
self, Engine, EngineProgressUpdate, ProgressUpdate, ProgressUpdateData, Response, SearchQuery,
self, Engine, EngineProgressUpdate, ProgressUpdateData, Response, SearchQuery,
};
fn render_beginning_of_html(query: &str) -> String {