From 134cba4061e3fe4bd100d6b9cde1d7f907245b83 Mon Sep 17 00:00:00 2001 From: mat Date: Mon, 15 Jan 2024 01:20:14 -0600 Subject: [PATCH] github stopped doing ssr for readmes --- Cargo.lock | 2 +- Cargo.toml | 3 ++- src/engines/postsearch/github.rs | 16 ++++++++++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 002bbad..c085a6c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -421,7 +421,7 @@ dependencies = [ [[package]] name = "fend-core" version = "1.4.1" -source = "git+https://github.com/mat-1/fend?branch=main#5c48b97070bbb6b1ec52400b2547e6f369cd78c7" +source = "git+https://github.com/printfn/fend?branch=main#5e5080231a5c22814ff778d3a1b4577467dd2ccd" [[package]] name = "flate2" diff --git a/Cargo.toml b/Cargo.toml index d4dce90..2479600 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,4 +40,5 @@ url = "2.5.0" urlencoding = "2.1.3" [patch.crates-io] -fend-core = { git = "https://github.com/mat-1/fend", branch = "main" } +# temporarily using git version of fend-core since it adds codepoint to character conversion +fend-core = { git = "https://github.com/printfn/fend", branch = "main" } diff --git a/src/engines/postsearch/github.rs b/src/engines/postsearch/github.rs index e9d144d..d631592 100644 --- a/src/engines/postsearch/github.rs +++ b/src/engines/postsearch/github.rs @@ -26,8 +26,20 @@ pub fn parse_response(body: &str) -> Option { .attr("href")?; let url = format!("https://github.com{url_relative}"); - let readme = dom.select(&Selector::parse("article").unwrap()).next()?; - let readme_html = readme.inner_html().trim().to_string(); + let embedded_data_script = dom + .select(&Selector::parse("script[data-target='react-partial.embeddedData']").unwrap()) + .last()? + .inner_html(); + let embedded_data = serde_json::from_str::(&embedded_data_script).ok()?; + let readme_html = embedded_data + .get("props")? + .get("initialPayload")? + .get("overview")? + .get("overviewFiles")? + .as_array()? + .first()? + .get("richText")? + .as_str()?; let mut readme_html = ammonia::Builder::default() .link_rel(None)