Skip to content

Commit

Permalink
Merge pull request #767 from Mingun/move-to-integration
Browse files Browse the repository at this point in the history
Convert some unit tests to integration tests
  • Loading branch information
Mingun authored Jun 23, 2024
2 parents 2659775 + a24ed89 commit 649f3d8
Show file tree
Hide file tree
Showing 11 changed files with 812 additions and 827 deletions.
54 changes: 20 additions & 34 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2165,8 +2165,9 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
replace(&mut self.lookahead, self.reader.next())
}

/// Returns `true` when next event is not a text event in any form.
#[inline(always)]
const fn need_trim_end(&self) -> bool {
const fn current_event_is_last_text(&self) -> bool {
// If next event is a text or CDATA, we should not trim trailing spaces
!matches!(
self.lookahead,
Expand All @@ -2182,43 +2183,27 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
/// [`CData`]: PayloadEvent::CData
fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
loop {
match self.lookahead {
Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => {
let text = self.next_text()?;

let mut s = result.into_owned();
s += &text;
result = Cow::Owned(s);
}
_ => break,
if self.current_event_is_last_text() {
break;
}
}
Ok(DeEvent::Text(Text { text: result }))
}

/// Read one text event, panics if current event is not a text event
///
/// |Event |XML |Handling
/// |-----------------------|---------------------------|----------------------------------------
/// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
/// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
/// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
/// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
/// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
#[inline(always)]
fn next_text(&mut self) -> Result<Cow<'i, str>, DeError> {
match self.next_impl()? {
PayloadEvent::Text(mut e) => {
if self.need_trim_end() {
e.inplace_trim_end();
match self.next_impl()? {
PayloadEvent::Text(mut e) => {
if self.current_event_is_last_text() {
// FIXME: Actually, we should trim after decoding text, but now we trim before
e.inplace_trim_end();
}
result
.to_mut()
.push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
}
Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
}
PayloadEvent::CData(e) => Ok(e.decode()?),
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),

// SAFETY: this method is called only when we peeked Text or CData
_ => unreachable!("Only `Text` and `CData` events can come here"),
// SAFETY: current_event_is_last_text checks that event is Text or CData
_ => unreachable!("Only `Text` and `CData` events can come here"),
}
}
Ok(DeEvent::Text(Text { text: result }))
}

/// Return an input-borrowing event.
Expand All @@ -2228,7 +2213,8 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
PayloadEvent::End(e) => Ok(DeEvent::End(e)),
PayloadEvent::Text(mut e) => {
if self.need_trim_end() && e.inplace_trim_end() {
if self.current_event_is_last_text() && e.inplace_trim_end() {
// FIXME: Actually, we should trim after decoding text, but now we trim before
continue;
}
self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
Expand Down
8 changes: 1 addition & 7 deletions src/reader/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
#[cfg(test)]
mod test {
use super::TokioAdapter;
use crate::reader::test::{check, small_buffers};
use crate::reader::test::check;

check!(
#[tokio::test]
Expand All @@ -370,12 +370,6 @@ mod test {
async, await
);

small_buffers!(
#[tokio::test]
read_event_into_async: tokio::io::BufReader<_>,
async, await
);

#[test]
fn test_future_is_send() {
// This test should just compile, no actual runtime checks are performed here.
Expand Down
57 changes: 1 addition & 56 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ impl Reader<BufReader<File>> {

#[cfg(test)]
mod test {
use crate::reader::test::{check, small_buffers};
use crate::reader::test::check;
use crate::reader::XmlSource;

/// Default buffer constructor just pass the byte array from the test
Expand All @@ -460,59 +460,4 @@ mod test {
identity,
&mut Vec::new()
);

small_buffers!(
#[test]
read_event_into: std::io::BufReader<_>
);

#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251};
use pretty_assertions::assert_eq;

/// Checks that encoding is detected by BOM and changed after XML declaration
/// BOM indicates UTF-16LE, but XML - windows-1251
#[test]
fn bom_detected() {
let mut reader =
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}

/// Checks that encoding is changed by XML declaration, but only once
#[test]
fn xml_declaration() {
let mut reader = Reader::from_reader(
b"<?xml encoding='UTF-16'?><?xml encoding='windows-1251'?>".as_ref(),
);
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}
}
}
149 changes: 0 additions & 149 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1826,157 +1826,8 @@ mod test {
};
}

/// Tests for https://github.com/tafia/quick-xml/issues/469
macro_rules! small_buffers {
(
#[$test:meta]
$read_event:ident: $BufReader:ty
$(, $async:ident, $await:ident)?
) => {
mod small_buffers {
use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;

#[$test]
$($async)? fn decl() {
let xml = "<?xml ?>";
// ^^^^^^^ data that fit into buffer
let size = xml.match_indices("?>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn pi() {
let xml = "<?pi?>";
// ^^^^^ data that fit into buffer
let size = xml.match_indices("?>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::PI(BytesPI::new("pi"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn empty() {
let xml = "<empty/>";
// ^^^^^^^ data that fit into buffer
let size = xml.match_indices("/>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Empty(BytesStart::new("empty"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn cdata1() {
let xml = "<![CDATA[cdata]]>";
// ^^^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("]]>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new("cdata"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn cdata2() {
let xml = "<![CDATA[cdata]]>";
// ^^^^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("]]>").next().unwrap().0 + 2;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new("cdata"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn comment1() {
let xml = "<!--comment-->";
// ^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("-->").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::new("comment"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn comment2() {
let xml = "<!--comment-->";
// ^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("-->").next().unwrap().0 + 2;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::new("comment"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
}
};
}

// Export macros for the child modules:
// - buffered_reader
// - slice_reader
pub(super) use check;
pub(super) use small_buffers;
}
21 changes: 0 additions & 21 deletions src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,25 +376,4 @@ mod test {
identity,
()
);

#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::UTF_8;
use pretty_assertions::assert_eq;

/// Checks that XML declaration cannot change the encoding from UTF-8 if
/// a `Reader` was created using `from_str` method
#[test]
fn str_always_has_utf8() {
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event().unwrap();
assert_eq!(reader.decoder().encoding(), UTF_8);

assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}
}
23 changes: 21 additions & 2 deletions src/reader/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,20 @@ impl ReaderState {
BytesText::wrap(content, self.decoder())
}

/// reads `BytesElement` starting with a `!`,
/// return `Comment`, `CData` or `DocType` event
/// Returns `Comment`, `CData` or `DocType` event.
///
/// `buf` contains data between `<` and `>`:
/// - CDATA: `![CDATA[...]]`
/// - Comment: `!--...--`
/// - Doctype (uppercase): `!D...`
/// - Doctype (lowercase): `!d...`
pub fn emit_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result<Event<'b>> {
debug_assert_eq!(
buf.first(),
Some(&b'!'),
"CDATA, comment or DOCTYPE should start from '!'"
);

let uncased_starts_with = |string: &[u8], prefix: &[u8]| {
string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix)
};
Expand Down Expand Up @@ -153,7 +164,15 @@ impl ReaderState {

/// Wraps content of `buf` into the [`Event::End`] event. Does the check that
/// end name matches the last opened start name if `self.config.check_end_names` is set.
///
/// `buf` contains data between `<` and `>`, for example `/tag`.
pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
debug_assert_eq!(
buf.first(),
Some(&b'/'),
"closing tag should start from '/'"
);

// Strip the `/` character. `content` contains data between `</` and `>`
let content = &buf[1..];
// XML standard permits whitespaces after the markup name in closing tags.
Expand Down
Loading

0 comments on commit 649f3d8

Please sign in to comment.