From 99a28ce8d36bb4d9e89fd90e94e89c11d2c737cc Mon Sep 17 00:00:00 2001 From: Shrecknt <58538423+Shrecknt@users.noreply.github.com> Date: Sun, 14 Apr 2024 18:42:40 -0700 Subject: [PATCH] add engines google-scholar rightdao stract yep * add engines google-scholar rightdao stract yep * add engines * cleanup --- default-config.toml | 5 +++ src/engines/mod.rs | 34 +++++++++------ src/engines/search.rs | 4 ++ src/engines/search/google_scholar.rs | 29 +++++++++++++ src/engines/search/rightdao.rs | 23 ++++++++++ src/engines/search/stract.rs | 36 ++++++++++++++++ src/engines/search/yep.rs | 63 ++++++++++++++++++++++++++++ 7 files changed, 181 insertions(+), 13 deletions(-) create mode 100644 src/engines/search/google_scholar.rs create mode 100644 src/engines/search/rightdao.rs create mode 100644 src/engines/search/stract.rs create mode 100644 src/engines/search/yep.rs diff --git a/default-config.toml b/default-config.toml index 899cbfa..a74cac3 100644 --- a/default-config.toml +++ b/default-config.toml @@ -5,6 +5,11 @@ google = { weight = 1.05 } bing = { weight = 1.0 } brave = { weight = 1.25 } +google-scholar = { enabled = false, weight = 0.50 } +rightdao = { enabled = false, weight = 0.10 } +stract = { enabled = false, weight = 0.15 } +yep = { enabled = false, weight = 0.10 } + # calculators (give them a high weight so they're always the first thing in autocomplete) numbat = { weight = 10 } fend = { enabled = false, weight = 10 } diff --git a/src/engines/mod.rs b/src/engines/mod.rs index 5d6d21c..3aaaf6d 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -27,39 +27,47 @@ pub mod search; engines! { // search Google = "google", + GoogleScholar = "google-scholar", Bing = "bing", Brave = "brave", Marginalia = "marginalia", + RightDao = "rightdao", + Stract = "stract", + Yep = "yep", // answer - Useragent = "useragent", - Ip = "ip", - Fend = "fend", - Numbat = "numbat", - Wikipedia = "wikipedia", Dictionary = "dictionary", + Fend = "fend", + Ip = "ip", + Numbat = "numbat", Thesaurus = "thesaurus", Timezone = "timezone", + Useragent = "useragent", + Wikipedia = "wikipedia", // post-search - StackExchange = "stackexchange", - GitHub = "github", DocsRs = "docs_rs", + GitHub = "github", + StackExchange = "stackexchange", } engine_requests! { // search - Google => search::google::request, parse_response, Bing => search::bing::request, parse_response, Brave => search::brave::request, parse_response, + GoogleScholar => search::google_scholar::request, parse_response, + Google => search::google::request, parse_response, Marginalia => search::marginalia::request, parse_response, + RightDao => search::rightdao::request, parse_response, + Stract => search::stract::request, parse_response, + Yep => search::yep::request, parse_response, // answer - Useragent => answer::useragent::request, None, - Ip => answer::ip::request, None, - Fend => answer::fend::request, None, - Numbat => answer::numbat::request, None, - Wikipedia => answer::wikipedia::request, parse_response, Dictionary => answer::dictionary::request, parse_response, + Fend => answer::fend::request, None, + Ip => answer::ip::request, None, + Numbat => answer::numbat::request, None, Thesaurus => answer::thesaurus::request, parse_response, Timezone => answer::timezone::request, None, + Useragent => answer::useragent::request, None, + Wikipedia => answer::wikipedia::request, parse_response, } engine_autocomplete_requests! { diff --git a/src/engines/search.rs b/src/engines/search.rs index c3e4025..be67d45 100644 --- a/src/engines/search.rs +++ b/src/engines/search.rs @@ -1,4 +1,8 @@ pub mod bing; pub mod brave; pub mod google; +pub mod google_scholar; pub mod marginalia; +pub mod rightdao; +pub mod stract; +pub mod yep; diff --git a/src/engines/search/google_scholar.rs b/src/engines/search/google_scholar.rs new file mode 100644 index 0000000..c230ee9 --- /dev/null +++ b/src/engines/search/google_scholar.rs @@ -0,0 +1,29 @@ +use reqwest::Url; + +use crate::{ + engines::{EngineResponse, RequestResponse, CLIENT}, + parse::{parse_html_response_with_opts, ParseOpts}, +}; + +pub fn request(query: &str) -> RequestResponse { + CLIENT + .get( + Url::parse_with_params( + "https://scholar.google.com/scholar", + &[("hl", "en"), ("as_sdt", "0,5"), ("q", query), ("btnG", "")], + ) + .unwrap(), + ) + .into() +} + +pub fn parse_response(body: &str) -> eyre::Result { + parse_html_response_with_opts( + body, + ParseOpts::new() + .result("div.gs_r") + .title("h3") + .href("h3 > a[href]") + .description("div.gs_rs"), + ) +} diff --git a/src/engines/search/rightdao.rs b/src/engines/search/rightdao.rs new file mode 100644 index 0000000..a249af1 --- /dev/null +++ b/src/engines/search/rightdao.rs @@ -0,0 +1,23 @@ +use reqwest::Url; + +use crate::{ + engines::{EngineResponse, RequestResponse, CLIENT}, + parse::{parse_html_response_with_opts, ParseOpts}, +}; + +pub fn request(query: &str) -> RequestResponse { + CLIENT + .get(Url::parse_with_params("https://rightdao.com/search", &[("q", query)]).unwrap()) + .into() +} + +pub fn parse_response(body: &str) -> eyre::Result { + parse_html_response_with_opts( + body, + ParseOpts::new() + .result("div.item") + .title("div.title") + .href("a[href]") + .description("div.description"), + ) +} diff --git a/src/engines/search/stract.rs b/src/engines/search/stract.rs new file mode 100644 index 0000000..2c6ae35 --- /dev/null +++ b/src/engines/search/stract.rs @@ -0,0 +1,36 @@ +use reqwest::Url; + +use crate::{ + engines::{EngineResponse, RequestResponse, CLIENT}, + parse::{parse_html_response_with_opts, ParseOpts}, +}; + +pub fn request(query: &str) -> RequestResponse { + CLIENT + .get( + Url::parse_with_params( + "https://stract.com/search", + &[ + ("ss", "false"), + // this is not a tracking parameter or token + // this is stract's default value for the search rankings parameter + ("sr", "N4IgNglg1gpgJiAXAbQLoBoRwgZ0rBFDEAIzAHsBjApNAXyA"), + ("q", query), + ("optic", ""), + ], + ) + .unwrap(), + ) + .into() +} + +pub fn parse_response(body: &str) -> eyre::Result { + parse_html_response_with_opts( + body, + ParseOpts::new() + .result("div.grid.w-full.grid-cols-1.space-y-10.place-self-start > div > div.flex.min-w-0.grow.flex-col") + .title("a[title]") + .href("a[href]") + .description("#snippet-text"), + ) +} diff --git a/src/engines/search/yep.rs b/src/engines/search/yep.rs new file mode 100644 index 0000000..8ddd8d6 --- /dev/null +++ b/src/engines/search/yep.rs @@ -0,0 +1,63 @@ +use reqwest::Url; +use serde::Deserialize; + +use crate::engines::{EngineResponse, EngineSearchResult, RequestResponse, CLIENT}; + +pub fn request(query: &str) -> RequestResponse { + CLIENT + .get( + Url::parse_with_params( + "https://api.yep.com/fs/2/search", + &[ + ("client", "web"), + ("gl", "all"), + ("no_correct", "true"), + ("q", query), + ("safeSearch", "off"), + ("type", "web"), + ], + ) + .unwrap(), + ) + .into() +} + +#[derive(Deserialize, Debug)] +struct YepApiResponse { + pub results: Vec, +} + +#[derive(Deserialize, Debug)] +struct YepApiResponseResult { + pub url: String, + pub title: String, + pub snippet: String, +} + +pub fn parse_response(body: &str) -> eyre::Result { + let (code, response): (String, YepApiResponse) = serde_json::from_str(body)?; + if &code != "Ok" { + return Ok(EngineResponse::new()); + } + + let search_results = response + .results + .into_iter() + .map(|result| { + let description_html = scraper::Html::parse_document(&result.snippet); + let description = description_html.root_element().text().collect(); + EngineSearchResult { + url: result.url, + title: result.title, + description, + } + }) + .collect(); + + Ok(EngineResponse { + search_results, + featured_snippet: None, + answer_html: None, + infobox_html: None, + }) +}