Skip to content

Commit

Permalink
Add allow_unmatched_ends reader config option
Browse files Browse the repository at this point in the history
  • Loading branch information
torokati44 committed Jun 26, 2024
1 parent 7f86e57 commit e9b7716
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 6 deletions.
20 changes: 20 additions & 0 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,25 @@ pub struct Config {
/// [`expand_empty_elements`]: Self::expand_empty_elements
pub check_end_names: bool,

/// Whether unmatched closing tag names should be allowed. Unless enabled,
/// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
/// is returned from read methods.
///
/// When set to `true`, it won't check if a closing tag has a corresponding
/// opening tag at all. For example, `<a></a></b>` will be permitted.
///
/// Note that the emitted [`End`] event will not be modified if this is enabled,
/// ie. it will contain the data of the unmatched end tag.
///
/// Note, that setting this to `true` will lead to additional allocates that
/// needed to store tag name for an [`End`] event.
///
/// Default: `false`
///
/// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag
/// [`End`]: crate::events::Event::End
pub allow_unmatched_ends: bool,

/// Whether empty elements should be split into an `Open` and a `Close` event.
///
/// When set to `true`, all [`Empty`] events produced by a self-closing tag
Expand Down Expand Up @@ -192,6 +211,7 @@ impl Default for Config {
Self {
check_comments: false,
check_end_names: true,
allow_unmatched_ends: false,
expand_empty_elements: false,
trim_markup_names_in_closing_tags: true,
trim_text_start: false,
Expand Down
14 changes: 8 additions & 6 deletions src/reader/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,14 @@ impl ReaderState {
self.opened_buffer.truncate(start);
}
None => {
// Report error at start of the end tag at `<` character
// -2 for `<` and `>`
self.last_error_offset = self.offset - buf.len() as u64 - 2;
return Err(Error::IllFormed(IllFormedError::UnmatchedEndTag(
decoder.decode(name).unwrap_or_default().into_owned(),
)));
if !self.config.allow_unmatched_ends {
// Report error at start of the end tag at `<` character
// -2 for `<` and `>`
self.last_error_offset = self.offset - buf.len() as u64 - 2;
return Err(Error::IllFormed(IllFormedError::UnmatchedEndTag(
decoder.decode(name).unwrap_or_default().into_owned(),
)));
}
}
}

Expand Down
21 changes: 21 additions & 0 deletions tests/reader-config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,27 @@ mod check_end_names {
);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}

#[test]
fn unmatched_end_tags() {
let mut reader = Reader::from_str("<tag></tag></unmatched>");
reader.config_mut().allow_unmatched_ends = true;

assert_eq!(
reader.read_event().unwrap(),
Event::Start(BytesStart::new("tag"))
);
assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("tag"))
);
// #770: We want to allow this
assert_eq!(
reader.read_event().unwrap(),
Event::End(BytesEnd::new("unmatched"))
);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}
}

Expand Down

0 comments on commit e9b7716

Please sign in to comment.