2023-12-21 04:18:43 +00:00
|
|
|
use scraper::{Html, Selector};
|
2023-12-22 06:19:22 +00:00
|
|
|
use url::Url;
|
2023-12-21 04:18:43 +00:00
|
|
|
|
2023-12-21 09:45:59 +00:00
|
|
|
use crate::engines::{answer::regex, Response, CLIENT};
|
2023-12-21 04:18:43 +00:00
|
|
|
|
|
|
|
pub fn request(response: &Response) -> Option<reqwest::RequestBuilder> {
|
|
|
|
for search_result in response.search_results.iter().take(8) {
|
2023-12-21 09:45:59 +00:00
|
|
|
if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+")
|
|
|
|
.is_match(&search_result.url)
|
2023-12-21 04:18:43 +00:00
|
|
|
{
|
2024-01-03 07:48:02 +00:00
|
|
|
return Some(CLIENT.get(search_result.url.as_str()));
|
2023-12-21 04:18:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
2023-12-22 00:43:29 +00:00
|
|
|
pub fn parse_response(body: &str, _url: Url) -> Option<String> {
|
2023-12-21 04:18:43 +00:00
|
|
|
let dom = Html::parse_document(body);
|
|
|
|
|
|
|
|
let title = dom
|
|
|
|
.select(&Selector::parse("h1").unwrap())
|
|
|
|
.next()?
|
|
|
|
.text()
|
|
|
|
.collect::<String>();
|
2023-12-21 09:45:59 +00:00
|
|
|
|
|
|
|
let base_url = dom
|
|
|
|
.select(&Selector::parse("link[rel=canonical]").unwrap())
|
|
|
|
.next()?
|
|
|
|
.value()
|
|
|
|
.attr("href")?;
|
2023-12-21 04:18:43 +00:00
|
|
|
let url = Url::join(
|
2023-12-21 09:45:59 +00:00
|
|
|
&Url::parse(base_url).unwrap(),
|
2023-12-21 04:18:43 +00:00
|
|
|
dom.select(&Selector::parse(".question-hyperlink").unwrap())
|
|
|
|
.next()?
|
|
|
|
.value()
|
|
|
|
.attr("href")?,
|
|
|
|
)
|
|
|
|
.ok()?;
|
|
|
|
|
|
|
|
let answer_query = Selector::parse("div.answer.accepted-answer").unwrap();
|
|
|
|
|
|
|
|
let answer = dom.select(&answer_query).next()?;
|
|
|
|
let answer_id = answer.value().attr("data-answerid")?;
|
|
|
|
let answer_html = answer
|
|
|
|
.select(&Selector::parse("div.answercell > div.js-post-body").unwrap())
|
|
|
|
.next()?
|
|
|
|
.html()
|
|
|
|
.to_string();
|
|
|
|
|
|
|
|
let url = format!("{url}#{answer_id}");
|
|
|
|
|
|
|
|
Some(format!(
|
2023-12-21 05:17:39 +00:00
|
|
|
r#"<a href="{url}"><h2>{title}</h2></a>
|
2023-12-21 09:45:59 +00:00
|
|
|
<div class="infobox-stackexchange-answer">{answer_html}</div>"#,
|
2023-12-21 04:18:43 +00:00
|
|
|
url = html_escape::encode_quoted_attribute(&url.to_string()),
|
|
|
|
title = html_escape::encode_text(&title),
|
|
|
|
))
|
|
|
|
}
|