more more quick answers

* notepad

* fix config

* mdn docs

* search bar fix

* remove unnecessary check

* notepad fix

* make config avaliable to all stages of request

* make config more consistent

* minecraft wiki infobox

* cleanup

* engine_list_separator fix

* are you happy now mta

* change Arc<Config> to &Config in places

---------

Co-authored-by: mat <git@matdoes.dev>
This commit is contained in:
Shrecknt 2024-04-15 19:49:40 -07:00 committed by GitHub
parent 39aeae7a69
commit 569922aab7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 208 additions and 32 deletions

View File

@ -1,11 +1,13 @@
bind = "0.0.0.0:28019"
engine_list_separator = false
[engines]
google = { weight = 1.05 }
bing = { weight = 1.0 }
brave = { weight = 1.25 }
google-scholar = { enabled = false, weight = 0.50 }
google_scholar = { enabled = false, weight = 0.50 }
rightdao = { enabled = false, weight = 0.10 }
stract = { enabled = false, weight = 0.15 }
yep = { enabled = false, weight = 0.10 }
@ -17,3 +19,8 @@ fend = { enabled = false, weight = 10 }
[engines.marginalia]
args = { profile = "corpo", js = "default", adtech = "default" }
weight = 0.15
[engines.mdn]
# the number of sections of text to display
# 1 is just the summary and 0 is no limit
max_sections = 1

View File

@ -6,9 +6,11 @@ use tracing::info;
use crate::engines::Engine;
#[derive(Deserialize)]
#[derive(Deserialize, Debug)]
pub struct Config {
pub bind: SocketAddr,
#[serde(default)]
pub engine_list_separator: Option<bool>,
pub engines: EnginesConfig,
}
@ -34,6 +36,8 @@ impl Config {
// use the default for something.
pub fn update(&mut self, other: Self) {
self.bind = other.bind;
self.engine_list_separator = self.engine_list_separator.or(other.engine_list_separator);
assert_ne!(self.engine_list_separator, None);
for (key, value) in other.engines.map {
if let Some(existing) = self.engines.map.get_mut(&key) {
existing.update(value);
@ -44,7 +48,7 @@ impl Config {
}
}
#[derive(Deserialize)]
#[derive(Deserialize, Debug)]
pub struct EnginesConfig {
#[serde(flatten)]
pub map: HashMap<Engine, DefaultableEngineConfig>,
@ -76,7 +80,7 @@ impl EnginesConfig {
}
}
#[derive(Deserialize, Clone)]
#[derive(Deserialize, Clone, Debug)]
#[serde(untagged)]
pub enum DefaultableEngineConfig {
Boolean(bool),
@ -99,7 +103,7 @@ impl Default for DefaultableEngineConfig {
}
}
#[derive(Deserialize, Clone)]
#[derive(Deserialize, Clone, Debug)]
pub struct FullEngineConfig {
#[serde(default = "default_true")]
pub enabled: bool,

View File

@ -50,7 +50,9 @@ pub struct WiktionaryDefinition {
pub examples: Vec<String>,
}
pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> eyre::Result<EngineResponse> {
pub fn parse_response(
HttpResponse { res, body, .. }: &HttpResponse,
) -> eyre::Result<EngineResponse> {
let url = res.url();
let Ok(res) = serde_json::from_str::<WiktionaryResponse>(body) else {

View File

@ -7,7 +7,8 @@ pub fn request(query: &SearchQuery) -> EngineResponse {
return EngineResponse::new();
}
EngineResponse::answer_html(
r#"<div contenteditable id='notepad' placeholder='Notes' style='width:100%;color:white;outline:none;min-height:4em;font-size:12px;'></div>"#.to_string()
)
// This allows pasting styles which is undesired behavior, and the
// `contenteditable="plaintext-only"` attribute currently only works on Chrome.
// This should be updated when the attribute becomes available in more browsers
EngineResponse::answer_html(r#"<div contenteditable class="answer-notepad"></div>"#.to_string())
}

View File

@ -28,7 +28,7 @@ pub mod search;
engines! {
// search
Google = "google",
GoogleScholar = "google-scholar",
GoogleScholar = "google_scholar",
Bing = "bing",
Brave = "brave",
Marginalia = "marginalia",
@ -49,6 +49,7 @@ engines! {
DocsRs = "docs_rs",
GitHub = "github",
Mdn = "mdn",
MinecraftWiki = "minecraft_wiki",
StackExchange = "stackexchange",
}
@ -84,6 +85,7 @@ engine_postsearch_requests! {
DocsRs => postsearch::docs_rs::request, parse_response,
GitHub => postsearch::github::request, parse_response,
Mdn => postsearch::mdn::request, parse_response,
MinecraftWiki => postsearch::minecraft_wiki::request, parse_response,
StackExchange => postsearch::stackexchange::request, parse_response,
}
@ -154,6 +156,7 @@ impl From<Vec<String>> for RequestAutocompleteResponse {
pub struct HttpResponse {
pub res: reqwest::Response,
pub body: String,
pub config: Arc<Config>,
}
impl<'a> From<&'a HttpResponse> for &'a str {
@ -302,7 +305,11 @@ pub async fn search(
start_time,
))?;
let http_response = HttpResponse { res, body };
let http_response = HttpResponse {
res,
body,
config: query.config.clone(),
};
let response = match engine.parse_response(&http_response) {
Ok(response) => response,
@ -339,7 +346,7 @@ pub async fn search(
join_all(response_futures).await.into_iter().collect();
let responses = responses_result?;
let response = merge_engine_responses(&query.config, responses);
let response = merge_engine_responses(query.config.clone(), responses);
let has_infobox = response.infobox.is_some();
@ -368,7 +375,11 @@ pub async fn search(
}
let body = String::from_utf8_lossy(&body_bytes).to_string();
let http_response = HttpResponse { res, body };
let http_response = HttpResponse {
res,
body,
config: query.config.clone(),
};
engine.postsearch_parse_response(&http_response)
}
Err(e) => {
@ -464,6 +475,7 @@ pub struct Response {
pub featured_snippet: Option<FeaturedSnippet>,
pub answer: Option<Answer>,
pub infobox: Option<Infobox>,
pub config: Arc<Config>,
}
#[derive(Debug, Clone)]
@ -495,7 +507,10 @@ pub struct Infobox {
pub engine: Engine,
}
fn merge_engine_responses(config: &Config, responses: HashMap<Engine, EngineResponse>) -> Response {
fn merge_engine_responses(
config: Arc<Config>,
responses: HashMap<Engine, EngineResponse>,
) -> Response {
let mut search_results: Vec<SearchResult> = Vec::new();
let mut featured_snippet: Option<FeaturedSnippet> = None;
let mut answer: Option<Answer> = None;
@ -596,6 +611,7 @@ fn merge_engine_responses(config: &Config, responses: HashMap<Engine, EngineResp
featured_snippet,
answer,
infobox,
config,
}
}

View File

@ -5,4 +5,5 @@
pub mod docs_rs;
pub mod github;
pub mod mdn;
pub mod minecraft_wiki;
pub mod stackexchange;

View File

@ -12,7 +12,7 @@ pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
None
}
pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> Option<String> {
pub fn parse_response(HttpResponse { res, body, .. }: &HttpResponse) -> Option<String> {
let url = res.url().clone();
let dom = Html::parse_document(body);

View File

@ -1,6 +1,13 @@
use scraper::{Html, Selector};
use serde::Deserialize;
use tracing::error;
use crate::engines::{HttpResponse, Response, CLIENT};
use crate::engines::{Engine, HttpResponse, Response, CLIENT};
#[derive(Deserialize)]
pub struct MdnConfig {
pub max_sections: usize,
}
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
for search_result in response.search_results.iter().take(8) {
@ -15,7 +22,16 @@ pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
None
}
pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> Option<String> {
pub fn parse_response(HttpResponse { res, body, config }: &HttpResponse) -> Option<String> {
let config_toml = config.engines.get(Engine::Mdn).extra.clone();
let config: MdnConfig = match toml::Value::Table(config_toml).try_into() {
Ok(args) => args,
Err(err) => {
error!("Failed to parse Mdn config: {err}");
return None;
}
};
let url = res.url().clone();
let dom = Html::parse_document(body);
@ -30,11 +46,18 @@ pub fn parse_response(HttpResponse { res, body }: &HttpResponse) -> Option<Strin
let doc_query = Selector::parse(".section-content").unwrap();
let max_sections = if config.max_sections == 0 {
usize::MAX
} else {
config.max_sections
};
let doc_html = dom
.select(&doc_query)
.next()
.map(|doc| doc.inner_html())
.unwrap_or_default();
.take(max_sections)
.collect::<Vec<_>>()
.join("<br />");
let doc_html = ammonia::Builder::default()
.link_rel(None)

View File

@ -0,0 +1,88 @@
use scraper::{ElementRef, Html, Selector};
use crate::engines::{HttpResponse, Response, CLIENT};
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
for search_result in response.search_results.iter().take(8) {
if search_result.url.starts_with("https://minecraft.wiki/w/") {
return Some(CLIENT.get(search_result.url.as_str()));
}
}
None
}
pub fn parse_response(HttpResponse { res, body, .. }: &HttpResponse) -> Option<String> {
let url = res.url().clone();
let dom = Html::parse_document(body);
let page_title = dom
.select(&Selector::parse("#firstHeading").unwrap())
.next()?
.text()
.collect::<String>()
.trim()
.to_string();
let doc_query = Selector::parse(".mw-parser-output").unwrap();
let doc_html = dom
.select(&doc_query)
.next()
.map(|doc| strip_gallery(doc))
.unwrap_or_default()
.join("");
let doc_html = ammonia::Builder::default()
.link_rel(None)
.add_allowed_classes("div", ["notaninfobox", "mcw-mainpage-icon"])
.add_allowed_classes("pre", ["noexcerpt", "navigation-not-searchable"])
.url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone()))
.clean(&doc_html)
.to_string();
let title_html = format!(
r#"<h2><a href="{url}">{title}</a></h2>"#,
url = html_escape::encode_quoted_attribute(&url.to_string()),
title = html_escape::encode_safe(&page_title),
);
Some(format!(
r#"{title_html}<div class="infobox-minecraft_wiki-article">{doc_html}</div>"#
))
}
fn strip_gallery(doc: ElementRef) -> Vec<String> {
let mut gallery = false;
doc.children()
.filter(|elem| {
let value = elem.value();
if gallery {
return false;
}
match value {
scraper::Node::Element(_) => {
let elem = ElementRef::wrap(*elem).unwrap();
let is_gallery_title = elem.first_child().map_or(false, |elem| {
elem.value().as_element().map_or(false, |_| {
let elem = ElementRef::wrap(elem).unwrap();
elem.text().collect::<String>() == "Gallery"
})
});
if is_gallery_title {
gallery = true;
return false;
}
true
}
_ => true,
}
})
.map(|elem| {
ElementRef::wrap(elem)
.map(|elem| elem.html())
.unwrap_or_default()
})
.collect()
}

View File

@ -113,7 +113,8 @@ document.addEventListener("keydown", (e) => {
const focusedEl = document.querySelector(":focus");
if (
focusedEl &&
(focusedEl.tagName == "input" ||
(focusedEl.tagName.toLowerCase() == "input" ||
focusedEl.tagName.toLowerCase() == "textarea" ||
focusedEl.getAttribute("contenteditable") !== null)
)
return;

View File

@ -292,6 +292,18 @@ h3.answer-thesaurus-category-title {
.answer-thesaurus-list a {
text-decoration: underline;
}
.answer-notepad {
width: calc( 100% - 4px );
height: fit-content;
overflow-y: show;
background-color: transparent;
color: white;
border: none;
outline: none;
min-height: 4em;
font-size: 12px;
resize: none;
}
/* infobox */
.infobox {
@ -353,3 +365,12 @@ h3.answer-thesaurus-category-title {
.postsearch-infobox p {
margin-bottom: 1em;
}
.infobox-minecraft_wiki-article > .notaninfobox {
display: none !important;
}
.noexcerpt, .navigation-not-searchable {
display: none !important;
}
.mcw-mainpage-icon {
display: inline-block;
}

View File

@ -45,18 +45,29 @@ fn render_end_of_html() -> String {
r"</main></div></body></html>".to_string()
}
fn render_engine_list(engines: &[engines::Engine]) -> String {
fn render_engine_list(engines: &[engines::Engine], config: &Config) -> String {
let mut html = String::new();
let mut first_iter = true;
for engine in engines {
if config.engine_list_separator.unwrap() && !first_iter {
html.push_str(" &middot; ");
}
first_iter = false;
let raw_engine_id = &engine.id();
let engine_id = if config.engine_list_separator.unwrap() {
raw_engine_id.replace('_', " ")
} else {
raw_engine_id.to_string()
};
html.push_str(&format!(
r#"<span class="engine-list-item">{engine}</span>"#,
engine = encode_text(&engine.id())
engine = encode_text(&engine_id)
));
}
format!(r#"<div class="engine-list">{html}</div>"#)
}
fn render_search_result(result: &engines::SearchResult) -> String {
fn render_search_result(result: &engines::SearchResult, config: &Config) -> String {
format!(
r#"<div class="search-result">
<a class="search-result-anchor" rel="noreferrer" href="{url_attr}">
@ -71,11 +82,12 @@ fn render_search_result(result: &engines::SearchResult) -> String {
url = encode_text(&result.url),
title = encode_text(&result.title),
desc = encode_text(&result.description),
engines_html = render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>())
engines_html =
render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), config)
)
}
fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> String {
fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet, config: &Config) -> String {
format!(
r#"<div class="featured-snippet">
<p class="search-result-description">{desc}</p>
@ -90,7 +102,7 @@ fn render_featured_snippet(featured_snippet: &engines::FeaturedSnippet) -> Strin
url_attr = encode_unquoted_attribute(&featured_snippet.url),
url = encode_text(&featured_snippet.url),
title = encode_text(&featured_snippet.title),
engines_html = render_engine_list(&[featured_snippet.engine])
engines_html = render_engine_list(&[featured_snippet.engine], config)
)
}
@ -100,21 +112,21 @@ fn render_results(response: Response) -> String {
html.push_str(&format!(
r#"<div class="infobox">{infobox_html}{engines_html}</div>"#,
infobox_html = &infobox.html,
engines_html = render_engine_list(&[infobox.engine])
engines_html = render_engine_list(&[infobox.engine], &response.config)
));
}
if let Some(answer) = &response.answer {
html.push_str(&format!(
r#"<div class="answer">{answer_html}{engines_html}</div>"#,
answer_html = &answer.html,
engines_html = render_engine_list(&[answer.engine])
engines_html = render_engine_list(&[answer.engine], &response.config)
));
}
if let Some(featured_snippet) = &response.featured_snippet {
html.push_str(&render_featured_snippet(featured_snippet));
html.push_str(&render_featured_snippet(featured_snippet, &response.config));
}
for result in &response.search_results {
html.push_str(&render_search_result(result));
html.push_str(&render_search_result(result, &response.config));
}
if response.infobox.is_none()
@ -187,7 +199,7 @@ pub async fn route(
|| addr.ip().to_string(),
|ip| ip.to_str().unwrap_or_default().to_string(),
),
config,
config: config.clone(),
};
let s = stream! {
@ -230,7 +242,7 @@ pub async fn route(
third_part.push_str(&format!(
r#"<div class="infobox postsearch-infobox">{infobox_html}{engines_html}</div>"#,
infobox_html = &infobox.html,
engines_html = render_engine_list(&[infobox.engine])
engines_html = render_engine_list(&[infobox.engine], &config)
));
}
}