diff --git a/Cargo.lock b/Cargo.lock index aa48e82..e75cd04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,6 +39,19 @@ dependencies = [ "memchr", ] +[[package]] +name = "ammonia" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e6d1c7838db705c9b756557ee27c384ce695a1c51a6fe528784cb1c6840170" +dependencies = [ + "html5ever", + "maplit", + "once_cell", + "tendril", + "url", +] + [[package]] name = "anyhow" version = "1.0.75" @@ -753,6 +766,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + [[package]] name = "markup5ever" version = "0.11.0" @@ -792,6 +811,7 @@ checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" name = "metasearch2" version = "0.1.0" dependencies = [ + "ammonia", "anyhow", "async-stream", "axum", diff --git a/Cargo.toml b/Cargo.toml index 7d72f80..cb98f83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +ammonia = "3.3.0" anyhow = "1.0.75" async-stream = "0.3.5" axum = { version = "0.7.2", features = ["http2"] } diff --git a/src/engines/answer/wikipedia.rs b/src/engines/answer/wikipedia.rs index 0526e29..a8a67d6 100644 --- a/src/engines/answer/wikipedia.rs +++ b/src/engines/answer/wikipedia.rs @@ -6,7 +6,6 @@ use serde::Deserialize; use crate::engines::{EngineResponse, CLIENT}; pub fn request(query: &str) -> reqwest::RequestBuilder { - println!("request wikipedia"); CLIENT .get( Url::parse_with_params( diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 660ab8c..6b42353 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -28,6 +28,7 @@ pub enum Engine { Wikipedia, // post-search StackOverflow, + GitHub, } impl Engine { @@ -41,6 +42,7 @@ impl Engine { Engine::Calc, Engine::Wikipedia, Engine::StackOverflow, + Engine::GitHub, ] } @@ -54,6 +56,7 @@ impl Engine { Engine::Calc => "calc", Engine::Wikipedia => "wikipedia", Engine::StackOverflow => "stackoverflow", + Engine::GitHub => "github", } } @@ -107,6 +110,7 @@ impl Engine { pub fn postsearch_request(&self, response: &Response) -> Option { match self { Engine::StackOverflow => postsearch::stackoverflow::request(response), + Engine::GitHub => postsearch::github::request(response), _ => None, } } @@ -114,6 +118,7 @@ impl Engine { pub fn postsearch_parse_response(&self, body: &str) -> Option { match self { Engine::StackOverflow => postsearch::stackoverflow::parse_response(body), + Engine::GitHub => postsearch::github::parse_response(body), _ => None, } } diff --git a/src/engines/postsearch.rs b/src/engines/postsearch.rs index 7ef8cf4..8f35944 100644 --- a/src/engines/postsearch.rs +++ b/src/engines/postsearch.rs @@ -2,4 +2,5 @@ //! results. They can only show stuff in infoboxes and don't get requested if //! an infobox was added by another earlier engine. +pub mod github; pub mod stackoverflow; diff --git a/src/engines/postsearch/github.rs b/src/engines/postsearch/github.rs new file mode 100644 index 0000000..40073ef --- /dev/null +++ b/src/engines/postsearch/github.rs @@ -0,0 +1,56 @@ +use reqwest::Url; +use scraper::{Html, Selector}; + +use crate::engines::{Response, CLIENT}; + +pub fn request(response: &Response) -> Option { + for search_result in response.search_results.iter().take(8) { + if search_result.url.starts_with("https://github.com/") { + return Some(CLIENT.get(search_result.url.as_str()).header( + "User-Agent", + "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0", + )); + } + } + + None +} + +pub fn parse_response(body: &str) -> Option { + let dom = Html::parse_document(body); + + let url = Url::join( + &Url::parse("https://github.com").unwrap(), + dom.select(&Selector::parse("main #repository-container-header a").unwrap()) + .next()? + .value() + .attr("href")?, + ) + .ok()? + .to_string(); + let readme = dom.select(&Selector::parse("article").unwrap()).next()?; + let readme_html = readme.inner_html().trim().to_string(); + + let mut readme_html = ammonia::Builder::default() + .link_rel(None) + .url_relative(ammonia::UrlRelative::RewriteWithBase( + Url::parse("https://github.com").unwrap(), + )) + .clean(&readme_html) + .to_string(); + + let readme_dom = Html::parse_fragment(&readme_html); + let title_el = readme_dom.select(&Selector::parse("h1").unwrap()).next()?; + let title_html = title_el.html().trim().to_string(); + if readme_html.starts_with(&title_html) { + readme_html = readme_html[title_html.len()..].to_string(); + } + let title = title_el.text().collect::(); + + Some(format!( + r#"

{title}

+
{readme_html}
"#, + url = html_escape::encode_quoted_attribute(&url), + title = html_escape::encode_text(&title), + )) +} diff --git a/src/engines/postsearch/stackoverflow.rs b/src/engines/postsearch/stackoverflow.rs index 4ce3aa9..63c6e33 100644 --- a/src/engines/postsearch/stackoverflow.rs +++ b/src/engines/postsearch/stackoverflow.rs @@ -49,7 +49,7 @@ pub fn parse_response(body: &str) -> Option { let url = format!("{url}#{answer_id}"); Some(format!( - r#"

{title}

+ r#"

{title}

{answer_html}
"#, url = html_escape::encode_quoted_attribute(&url.to_string()), title = html_escape::encode_text(&title), diff --git a/src/web/assets/script.js b/src/web/assets/script.js index 7406dc2..fe655d8 100644 --- a/src/web/assets/script.js +++ b/src/web/assets/script.js @@ -22,3 +22,11 @@ searchInputEl.addEventListener("input", async (e) => { datalistEl.appendChild(optionEl); }); }); + +// if the user starts typing but they don't have focus on the input, focus it +document.addEventListener("keydown", (e) => { + // must be a letter or number + if (e.key.match(/^[a-z0-9]$/i) && !searchInputEl.matches(":focus")) { + searchInputEl.focus(); + } +}); diff --git a/src/web/search.rs b/src/web/search.rs index ff3c6da..9a8ea47 100644 --- a/src/web/search.rs +++ b/src/web/search.rs @@ -11,7 +11,7 @@ use bytes::Bytes; use html_escape::{encode_text, encode_unquoted_attribute}; use crate::engines::{ - self, Engine, EngineProgressUpdate, ProgressUpdate, ProgressUpdateData, Response, SearchQuery, + self, Engine, EngineProgressUpdate, ProgressUpdateData, Response, SearchQuery, }; fn render_beginning_of_html(query: &str) -> String {