From e9b771624af87b65bae499f19d2ba4232591eb2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=96R=C3=96K=20Attila?= Date: Wed, 26 Jun 2024 23:27:26 +0200 Subject: [PATCH 1/2] Add `allow_unmatched_ends` reader config option --- src/reader/mod.rs | 20 ++++++++++++++++++++ src/reader/state.rs | 14 ++++++++------ tests/reader-config.rs | 21 +++++++++++++++++++++ 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 843b7032..68853cb3 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -76,6 +76,25 @@ pub struct Config { /// [`expand_empty_elements`]: Self::expand_empty_elements pub check_end_names: bool, + /// Whether unmatched closing tag names should be allowed. Unless enabled, + /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`] + /// is returned from read methods. + /// + /// When set to `true`, it won't check if a closing tag has a corresponding + /// opening tag at all. For example, `` will be permitted. + /// + /// Note that the emitted [`End`] event will not be modified if this is enabled, + /// ie. it will contain the data of the unmatched end tag. + /// + /// Note, that setting this to `true` will lead to additional allocates that + /// needed to store tag name for an [`End`] event. + /// + /// Default: `false` + /// + /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag + /// [`End`]: crate::events::Event::End + pub allow_unmatched_ends: bool, + /// Whether empty elements should be split into an `Open` and a `Close` event. /// /// When set to `true`, all [`Empty`] events produced by a self-closing tag @@ -192,6 +211,7 @@ impl Default for Config { Self { check_comments: false, check_end_names: true, + allow_unmatched_ends: false, expand_empty_elements: false, trim_markup_names_in_closing_tags: true, trim_text_start: false, diff --git a/src/reader/state.rs b/src/reader/state.rs index 9b1117e2..2f3e8512 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -212,12 +212,14 @@ impl ReaderState { self.opened_buffer.truncate(start); } None => { - // Report error at start of the end tag at `<` character - // -2 for `<` and `>` - self.last_error_offset = self.offset - buf.len() as u64 - 2; - return Err(Error::IllFormed(IllFormedError::UnmatchedEndTag( - decoder.decode(name).unwrap_or_default().into_owned(), - ))); + if !self.config.allow_unmatched_ends { + // Report error at start of the end tag at `<` character + // -2 for `<` and `>` + self.last_error_offset = self.offset - buf.len() as u64 - 2; + return Err(Error::IllFormed(IllFormedError::UnmatchedEndTag( + decoder.decode(name).unwrap_or_default().into_owned(), + ))); + } } } diff --git a/tests/reader-config.rs b/tests/reader-config.rs index fd968ce3..bd2465e3 100644 --- a/tests/reader-config.rs +++ b/tests/reader-config.rs @@ -344,6 +344,27 @@ mod check_end_names { ); assert_eq!(reader.read_event().unwrap(), Event::Eof); } + + #[test] + fn unmatched_end_tags() { + let mut reader = Reader::from_str(""); + reader.config_mut().allow_unmatched_ends = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("tag")) + ); + // #770: We want to allow this + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("unmatched")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } } } From ab18642981c1ec8861de71dae9d4997a002882ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=96R=C3=96K=20Attila?= Date: Thu, 27 Jun 2024 00:15:37 +0200 Subject: [PATCH 2/2] Add changelog entry for #772 --- Changelog.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Changelog.md b/Changelog.md index 28823712..ab8548b0 100644 --- a/Changelog.md +++ b/Changelog.md @@ -15,10 +15,14 @@ ### New Features +- [#772]: Add `reader::Config::allow_unmatched_ends` to permit dangling end tags + ### Bug Fixes ### Misc Changes +[#772]: https://github.com/tafia/quick-xml/pull/772 + ## 0.34.0 -- 2024-06-25