fix github header deduplication and upgrade deps

This commit is contained in:
mat 2024-08-21 07:57:12 +00:00
parent cd39774e72
commit 10fa3d82b9
3 changed files with 334 additions and 278 deletions

574
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -22,31 +22,31 @@ axum = { version = "0.7.5", default-features = false, features = [
] }
axum-extra = { version = "0.9.3", features = ["cookie"] }
base64 = "0.22.1"
bytes = "1.6.0"
bytes = "1.7.1"
chrono = "0.4.38"
chrono-tz = { version = "0.9.0", features = ["case-insensitive"] }
eyre = "0.6.12"
fend-core = "1.4.9"
fend-core = "1.5.1"
futures = "0.3.30"
html-escape = "0.2.13"
maud = "0.26.0"
numbat = "1.12.0"
numbat = "1.13.0"
rand = "0.8.5"
regex = "1.10.5"
reqwest = { version = "0.12.5", default-features = false, features = [
regex = "1.10.6"
reqwest = { version = "0.12.7", default-features = false, features = [
"rustls-tls",
"gzip",
"deflate",
"brotli",
] }
scraper = "0.19.0"
serde = { version = "1.0.203", features = ["derive"] }
scraper = "0.20.0"
serde = { version = "1.0.208", features = ["derive"] }
# preserve_order is needed for google images. yippee!
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tokio = { version = "1.38.0", features = ["rt", "macros"] }
serde_json = { version = "1.0.125", features = ["preserve_order"] }
tokio = { version = "1.39.3", features = ["rt", "macros"] }
tokio-stream = "0.1.15"
toml = { version = "0.8.14", default-features = false, features = ["parse"] }
tower = "0.4.13"
toml = { version = "0.8.19", default-features = false, features = ["parse"] }
tower = "0.5.0"
tower-http = "0.5.2"
tracing = "0.1.40"
tracing-subscriber = "0.3.18"

View File

@ -53,8 +53,22 @@ pub fn parse_response(body: &str) -> Option<PreEscaped<String>> {
.to_string();
let readme_dom = Html::parse_fragment(&readme_html);
let title = if let Some(title_el) = readme_dom.select(&Selector::parse("h1").unwrap()).next() {
// if the readme is wrapped in <article>, remove that
if let Some(article) = readme_dom
.select(&Selector::parse("article").unwrap())
.next()
{
readme_html = article.inner_html().to_string();
}
let title = if let Some(title_el) = readme_dom
// github wraps their h1s in a <div class="">
.select(&Selector::parse("div:has(h1)").unwrap())
.next()
{
// if the readme starts with an h1, remove it
let title_html = title_el.html().trim().to_string();
if readme_html.starts_with(&title_html) {
readme_html = readme_html[title_html.len()..].to_string();
}