From d5293d16b2938b47bd2b44f9f5829bf37c1b195b Mon Sep 17 00:00:00 2001 From: autoantwort Date: Fri, 18 Oct 2024 19:14:07 +0200 Subject: [PATCH] We must also check the fragment before it is percent decoded as required by the html standard. Fixes https://github.com/lycheeverse/lychee/issues/1467 --- fixtures/fragments/file.html | 2 ++ lychee-lib/src/utils/fragment_checker.rs | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fixtures/fragments/file.html b/fixtures/fragments/file.html index a5fcef56aa..5ff181f737 100644 --- a/fixtures/fragments/file.html +++ b/fixtures/fragments/file.html @@ -7,6 +7,7 @@

+

To start let's run away. @@ -17,6 +18,7 @@

Word

back we go
back we go upper does not work
+ id with percent encoding
back to Upper-ÄÖö
back to öüä encoded
doesn't exist
diff --git a/lychee-lib/src/utils/fragment_checker.rs b/lychee-lib/src/utils/fragment_checker.rs index 50f55f4354..d64ea18215 100644 --- a/lychee-lib/src/utils/fragment_checker.rs +++ b/lychee-lib/src/utils/fragment_checker.rs @@ -47,7 +47,7 @@ impl FragmentChecker { let Some(fragment) = url.fragment() else { return Ok(true); }; - let mut fragment = percent_decode_str(fragment).decode_utf8()?; + let mut fragment_decoded = percent_decode_str(fragment).decode_utf8()?; let url_without_frag = Self::remove_fragment(url.clone()); let file_type = FileType::from(path); @@ -57,15 +57,19 @@ impl FragmentChecker { FileType::Plaintext => return Ok(true), }; if file_type == FileType::Markdown { - fragment = fragment.to_lowercase().into(); + fragment_decoded = fragment_decoded.to_lowercase().into(); } match self.cache.lock().await.entry(url_without_frag) { Entry::Vacant(entry) => { let content = fs::read_to_string(path).await?; let file_frags = extractor(&content); - Ok(entry.insert(file_frags).contains(&fragment as &str)) + let contains_fragment = + file_frags.contains(fragment) || file_frags.contains(&fragment_decoded as &str); + entry.insert(file_frags); + Ok(contains_fragment) } - Entry::Occupied(entry) => Ok(entry.get().contains(&fragment as &str)), + Entry::Occupied(entry) => Ok(entry.get().contains(&fragment as &str) + || entry.get().contains(&fragment_decoded as &str)), } }