diff --git a/src/de/mod.rs b/src/de/mod.rs index 1f1cd606..74461b8b 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2165,8 +2165,9 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { replace(&mut self.lookahead, self.reader.next()) } + /// Returns `true` when next event is not a text event in any form. #[inline(always)] - const fn need_trim_end(&self) -> bool { + const fn current_event_is_last_text(&self) -> bool { // If next event is a text or CDATA, we should not trim trailing spaces !matches!( self.lookahead, @@ -2182,43 +2183,27 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { /// [`CData`]: PayloadEvent::CData fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result, DeError> { loop { - match self.lookahead { - Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => { - let text = self.next_text()?; - - let mut s = result.into_owned(); - s += &text; - result = Cow::Owned(s); - } - _ => break, + if self.current_event_is_last_text() { + break; } - } - Ok(DeEvent::Text(Text { text: result })) - } - /// Read one text event, panics if current event is not a text event - /// - /// |Event |XML |Handling - /// |-----------------------|---------------------------|---------------------------------------- - /// |[`PayloadEvent::Start`]|`...` |Possible panic (unreachable) - /// |[`PayloadEvent::End`] |`` |Possible panic (unreachable) - /// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it - /// |[`PayloadEvent::CData`]|``|Returns `cdata content` unchanged - /// |[`PayloadEvent::Eof`] | |Possible panic (unreachable) - #[inline(always)] - fn next_text(&mut self) -> Result, DeError> { - match self.next_impl()? { - PayloadEvent::Text(mut e) => { - if self.need_trim_end() { - e.inplace_trim_end(); + match self.next_impl()? { + PayloadEvent::Text(mut e) => { + if self.current_event_is_last_text() { + // FIXME: Actually, we should trim after decoding text, but now we trim before + e.inplace_trim_end(); + } + result + .to_mut() + .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?); } - Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?) - } - PayloadEvent::CData(e) => Ok(e.decode()?), + PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?), - // SAFETY: this method is called only when we peeked Text or CData - _ => unreachable!("Only `Text` and `CData` events can come here"), + // SAFETY: current_event_is_last_text checks that event is Text or CData + _ => unreachable!("Only `Text` and `CData` events can come here"), + } } + Ok(DeEvent::Text(Text { text: result })) } /// Return an input-borrowing event. @@ -2228,7 +2213,8 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { PayloadEvent::Start(e) => Ok(DeEvent::Start(e)), PayloadEvent::End(e) => Ok(DeEvent::End(e)), PayloadEvent::Text(mut e) => { - if self.need_trim_end() && e.inplace_trim_end() { + if self.current_event_is_last_text() && e.inplace_trim_end() { + // FIXME: Actually, we should trim after decoding text, but now we trim before continue; } self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?) diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index ef3ecc1f..e1f28629 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -359,7 +359,7 @@ impl NsReader { #[cfg(test)] mod test { use super::TokioAdapter; - use crate::reader::test::{check, small_buffers}; + use crate::reader::test::check; check!( #[tokio::test] @@ -370,12 +370,6 @@ mod test { async, await ); - small_buffers!( - #[tokio::test] - read_event_into_async: tokio::io::BufReader<_>, - async, await - ); - #[test] fn test_future_is_send() { // This test should just compile, no actual runtime checks are performed here. diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 1658d925..e515826a 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -445,7 +445,7 @@ impl Reader> { #[cfg(test)] mod test { - use crate::reader::test::{check, small_buffers}; + use crate::reader::test::check; use crate::reader::XmlSource; /// Default buffer constructor just pass the byte array from the test @@ -460,59 +460,4 @@ mod test { identity, &mut Vec::new() ); - - small_buffers!( - #[test] - read_event_into: std::io::BufReader<_> - ); - - #[cfg(feature = "encoding")] - mod encoding { - use crate::events::Event; - use crate::reader::Reader; - use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251}; - use pretty_assertions::assert_eq; - - /// Checks that encoding is detected by BOM and changed after XML declaration - /// BOM indicates UTF-16LE, but XML - windows-1251 - #[test] - fn bom_detected() { - let mut reader = - Reader::from_reader(b"\xFF\xFE".as_ref()); - let mut buf = Vec::new(); - - assert_eq!(reader.decoder().encoding(), UTF_8); - assert!(matches!( - reader.read_event_into(&mut buf).unwrap(), - Event::Decl(_) - )); - assert_eq!(reader.decoder().encoding(), WINDOWS_1251); - - assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); - } - - /// Checks that encoding is changed by XML declaration, but only once - #[test] - fn xml_declaration() { - let mut reader = Reader::from_reader( - b"".as_ref(), - ); - let mut buf = Vec::new(); - - assert_eq!(reader.decoder().encoding(), UTF_8); - assert!(matches!( - reader.read_event_into(&mut buf).unwrap(), - Event::Decl(_) - )); - assert_eq!(reader.decoder().encoding(), UTF_16LE); - - assert!(matches!( - reader.read_event_into(&mut buf).unwrap(), - Event::Decl(_) - )); - assert_eq!(reader.decoder().encoding(), UTF_16LE); - - assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); - } - } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 2511dcbe..bfef4d9b 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -1826,157 +1826,8 @@ mod test { }; } - /// Tests for https://github.com/tafia/quick-xml/issues/469 - macro_rules! small_buffers { - ( - #[$test:meta] - $read_event:ident: $BufReader:ty - $(, $async:ident, $await:ident)? - ) => { - mod small_buffers { - use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event}; - use crate::reader::Reader; - use pretty_assertions::assert_eq; - - #[$test] - $($async)? fn decl() { - let xml = ""; - // ^^^^^^^ data that fit into buffer - let size = xml.match_indices("?>").next().unwrap().0 + 1; - let br = <$BufReader>::with_capacity(size, xml.as_bytes()); - let mut reader = Reader::from_reader(br); - let mut buf = Vec::new(); - - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) - ); - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Eof - ); - } - - #[$test] - $($async)? fn pi() { - let xml = ""; - // ^^^^^ data that fit into buffer - let size = xml.match_indices("?>").next().unwrap().0 + 1; - let br = <$BufReader>::with_capacity(size, xml.as_bytes()); - let mut reader = Reader::from_reader(br); - let mut buf = Vec::new(); - - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::PI(BytesPI::new("pi")) - ); - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Eof - ); - } - - #[$test] - $($async)? fn empty() { - let xml = ""; - // ^^^^^^^ data that fit into buffer - let size = xml.match_indices("/>").next().unwrap().0 + 1; - let br = <$BufReader>::with_capacity(size, xml.as_bytes()); - let mut reader = Reader::from_reader(br); - let mut buf = Vec::new(); - - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Empty(BytesStart::new("empty")) - ); - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Eof - ); - } - - #[$test] - $($async)? fn cdata1() { - let xml = ""; - // ^^^^^^^^^^^^^^^ data that fit into buffer - let size = xml.match_indices("]]>").next().unwrap().0 + 1; - let br = <$BufReader>::with_capacity(size, xml.as_bytes()); - let mut reader = Reader::from_reader(br); - let mut buf = Vec::new(); - - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::CData(BytesCData::new("cdata")) - ); - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Eof - ); - } - - #[$test] - $($async)? fn cdata2() { - let xml = ""; - // ^^^^^^^^^^^^^^^^ data that fit into buffer - let size = xml.match_indices("]]>").next().unwrap().0 + 2; - let br = <$BufReader>::with_capacity(size, xml.as_bytes()); - let mut reader = Reader::from_reader(br); - let mut buf = Vec::new(); - - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::CData(BytesCData::new("cdata")) - ); - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Eof - ); - } - - #[$test] - $($async)? fn comment1() { - let xml = ""; - // ^^^^^^^^^^^^ data that fit into buffer - let size = xml.match_indices("-->").next().unwrap().0 + 1; - let br = <$BufReader>::with_capacity(size, xml.as_bytes()); - let mut reader = Reader::from_reader(br); - let mut buf = Vec::new(); - - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Comment(BytesText::new("comment")) - ); - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Eof - ); - } - - #[$test] - $($async)? fn comment2() { - let xml = ""; - // ^^^^^^^^^^^^^ data that fit into buffer - let size = xml.match_indices("-->").next().unwrap().0 + 2; - let br = <$BufReader>::with_capacity(size, xml.as_bytes()); - let mut reader = Reader::from_reader(br); - let mut buf = Vec::new(); - - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Comment(BytesText::new("comment")) - ); - assert_eq!( - reader.$read_event(&mut buf) $(.$await)? .unwrap(), - Event::Eof - ); - } - } - }; - } - // Export macros for the child modules: // - buffered_reader // - slice_reader pub(super) use check; - pub(super) use small_buffers; } diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index ed00f867..89a9dd96 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -376,25 +376,4 @@ mod test { identity, () ); - - #[cfg(feature = "encoding")] - mod encoding { - use crate::events::Event; - use crate::reader::Reader; - use encoding_rs::UTF_8; - use pretty_assertions::assert_eq; - - /// Checks that XML declaration cannot change the encoding from UTF-8 if - /// a `Reader` was created using `from_str` method - #[test] - fn str_always_has_utf8() { - let mut reader = Reader::from_str(""); - - assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event().unwrap(); - assert_eq!(reader.decoder().encoding(), UTF_8); - - assert_eq!(reader.read_event().unwrap(), Event::Eof); - } - } } diff --git a/src/reader/state.rs b/src/reader/state.rs index b2ab1065..128a21a5 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -71,9 +71,20 @@ impl ReaderState { BytesText::wrap(content, self.decoder()) } - /// reads `BytesElement` starting with a `!`, - /// return `Comment`, `CData` or `DocType` event + /// Returns `Comment`, `CData` or `DocType` event. + /// + /// `buf` contains data between `<` and `>`: + /// - CDATA: `![CDATA[...]]` + /// - Comment: `!--...--` + /// - Doctype (uppercase): `!D...` + /// - Doctype (lowercase): `!d...` pub fn emit_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result> { + debug_assert_eq!( + buf.first(), + Some(&b'!'), + "CDATA, comment or DOCTYPE should start from '!'" + ); + let uncased_starts_with = |string: &[u8], prefix: &[u8]| { string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix) }; @@ -153,7 +164,15 @@ impl ReaderState { /// Wraps content of `buf` into the [`Event::End`] event. Does the check that /// end name matches the last opened start name if `self.config.check_end_names` is set. + /// + /// `buf` contains data between `<` and `>`, for example `/tag`. pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result> { + debug_assert_eq!( + buf.first(), + Some(&b'/'), + "closing tag should start from '/'" + ); + // Strip the `/` character. `content` contains data between `` let content = &buf[1..]; // XML standard permits whitespaces after the markup name in closing tags. diff --git a/src/writer.rs b/src/writer.rs index 74a70317..7dec0ccf 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -647,558 +647,3 @@ impl Indentation { } } } - -#[cfg(test)] -mod indentation { - use super::*; - use crate::events::*; - use pretty_assertions::assert_eq; - - #[test] - fn self_closed() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - let tag = BytesStart::new("self-closed") - .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - writer - .write_event(Event::Empty(tag)) - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#""# - ); - } - - #[test] - fn empty_paired() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - let start = BytesStart::new("paired") - .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = start.to_end(); - writer - .write_event(Event::Start(start.clone())) - .expect("write start tag failed"); - writer - .write_event(Event::End(end)) - .expect("write end tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#" -"# - ); - } - - #[test] - fn paired_with_inner() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - let start = BytesStart::new("paired") - .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = start.to_end(); - let inner = BytesStart::new("inner"); - - writer - .write_event(Event::Start(start.clone())) - .expect("write start tag failed"); - writer - .write_event(Event::Empty(inner)) - .expect("write inner tag failed"); - writer - .write_event(Event::End(end)) - .expect("write end tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#" - -"# - ); - } - - #[test] - fn paired_with_text() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - let start = BytesStart::new("paired") - .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = start.to_end(); - let text = BytesText::new("text"); - - writer - .write_event(Event::Start(start.clone())) - .expect("write start tag failed"); - writer - .write_event(Event::Text(text)) - .expect("write text failed"); - writer - .write_event(Event::End(end)) - .expect("write end tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#"text"# - ); - } - - #[test] - fn mixed_content() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - let start = BytesStart::new("paired") - .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = start.to_end(); - let text = BytesText::new("text"); - let inner = BytesStart::new("inner"); - - writer - .write_event(Event::Start(start.clone())) - .expect("write start tag failed"); - writer - .write_event(Event::Text(text)) - .expect("write text failed"); - writer - .write_event(Event::Empty(inner)) - .expect("write inner tag failed"); - writer - .write_event(Event::End(end)) - .expect("write end tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#"text -"# - ); - } - - #[test] - fn nested() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - let start = BytesStart::new("paired") - .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = start.to_end(); - let inner = BytesStart::new("inner"); - - writer - .write_event(Event::Start(start.clone())) - .expect("write start 1 tag failed"); - writer - .write_event(Event::Start(start.clone())) - .expect("write start 2 tag failed"); - writer - .write_event(Event::Empty(inner)) - .expect("write inner tag failed"); - writer - .write_event(Event::End(end.clone())) - .expect("write end tag 2 failed"); - writer - .write_event(Event::End(end)) - .expect("write end tag 1 failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#" - - - -"# - ); - } - - #[cfg(feature = "serialize")] - #[test] - fn serializable() { - #[derive(Serialize)] - struct Foo { - #[serde(rename = "@attribute")] - attribute: &'static str, - - element: Bar, - list: Vec<&'static str>, - - #[serde(rename = "$text")] - text: &'static str, - - val: String, - } - - #[derive(Serialize)] - struct Bar { - baz: usize, - bat: usize, - } - - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - let content = Foo { - attribute: "attribute", - element: Bar { baz: 42, bat: 43 }, - list: vec!["first element", "second element"], - text: "text", - val: "foo".to_owned(), - }; - - let start = BytesStart::new("paired") - .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = start.to_end(); - - writer - .write_event(Event::Start(start.clone())) - .expect("write start tag failed"); - writer - .write_serializable("foo_element", &content) - .expect("write serializable inner contents failed"); - writer - .write_event(Event::End(end)) - .expect("write end tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#" - - - 42 - 43 - - first element - second element - text - foo - -"# - ); - } - - #[test] - fn element_writer_empty() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - writer - .create_element("empty") - .with_attribute(("attr1", "value1")) - .with_attribute(("attr2", "value2")) - .write_empty() - .expect("failure"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#""# - ); - } - - #[test] - fn element_writer_text() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - writer - .create_element("paired") - .with_attribute(("attr1", "value1")) - .with_attribute(("attr2", "value2")) - .write_text_content(BytesText::new("text")) - .expect("failure"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#"text"# - ); - } - - #[test] - fn element_writer_nested() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - - writer - .create_element("outer") - .with_attribute(("attr1", "value1")) - .with_attribute(("attr2", "value2")) - .write_inner_content::<_, Error>(|writer| { - let fruits = ["apple", "orange", "banana"]; - for (quant, item) in fruits.iter().enumerate() { - writer - .create_element("fruit") - .with_attribute(("quantity", quant.to_string().as_str())) - .write_text_content(BytesText::new(item))?; - } - writer - .create_element("inner") - .write_inner_content(|writer| { - writer.create_element("empty").write_empty().map(|_| ()) - })?; - - Ok(()) - }) - .expect("failure"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#" - apple - orange - banana - - - -"# - ); - } - - mod in_attributes { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn newline_first() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); - - writer - .create_element("element") - .new_line() - .with_attribute(("first", "1")) - .with_attribute(("second", "2")) - .new_line() - .with_attribute(("third", "3")) - .with_attribute(("fourth", "4")) - .write_empty() - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - "" - ); - } - - #[test] - fn newline_inside() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); - - writer - .create_element("element") - .with_attribute(("first", "1")) - .with_attribute(("second", "2")) - .new_line() - .with_attribute(("third", "3")) - .with_attribute(("fourth", "4")) - .write_empty() - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - "" - ); - } - - #[test] - fn newline_last() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); - - writer - .create_element("element") - .new_line() - .with_attribute(("first", "1")) - .with_attribute(("second", "2")) - .new_line() - .with_attribute(("third", "3")) - .with_attribute(("fourth", "4")) - .new_line() - .write_empty() - .expect("write tag failed"); - - writer - .create_element("element") - .with_attribute(("first", "1")) - .with_attribute(("second", "2")) - .new_line() - .with_attribute(("third", "3")) - .with_attribute(("fourth", "4")) - .new_line() - .write_empty() - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - "\ - \n" - ); - } - - #[test] - fn newline_twice() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); - - writer - .create_element("element") - .new_line() - .new_line() - .write_empty() - .expect("write tag failed"); - - writer - .create_element("element") - .with_attribute(("first", "1")) - .new_line() - .new_line() - .with_attribute(("second", "2")) - .write_empty() - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#" -"# - ); - } - - #[test] - fn without_indent() { - let mut buffer = Vec::new(); - let mut writer = Writer::new(&mut buffer); - - writer - .create_element("element") - .new_line() - .new_line() - .write_empty() - .expect("write tag failed"); - - writer - .create_element("element") - .with_attribute(("first", "1")) - .new_line() - .new_line() - .with_attribute(("second", "2")) - .write_empty() - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#""# - ); - } - - #[test] - fn long_element_name() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b't', 1); - - writer - .create_element(String::from("x").repeat(128).as_str()) - .with_attribute(("first", "1")) - .new_line() - .with_attribute(("second", "2")) - .write_empty() - .expect("Problem with indentation reference"); - } - } - - mod in_attributes_multi { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn newline_first() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); - - writer - .create_element("element") - .new_line() - .with_attributes([("first", "1"), ("second", "2")]) - .new_line() - .with_attributes([("third", "3"), ("fourth", "4")]) - .write_empty() - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - "" - ); - } - - #[test] - fn newline_inside() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); - - writer - .create_element("element") - .with_attributes([("first", "1"), ("second", "2")]) - .new_line() - .with_attributes([("third", "3"), ("fourth", "4")]) - .write_empty() - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - r#""# - ); - } - - #[test] - fn newline_last() { - let mut buffer = Vec::new(); - let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); - - writer - .create_element("element") - .new_line() - .with_attributes([("first", "1"), ("second", "2")]) - .new_line() - .with_attributes([("third", "3"), ("fourth", "4")]) - .new_line() - .write_empty() - .expect("write tag failed"); - - writer - .create_element("element") - .with_attributes([("first", "1"), ("second", "2")]) - .new_line() - .with_attributes([("third", "3"), ("fourth", "4")]) - .new_line() - .write_empty() - .expect("write tag failed"); - - assert_eq!( - std::str::from_utf8(&buffer).unwrap(), - "\ - \n" - ); - } - } -} diff --git a/tests/async-tokio.rs b/tests/async-tokio.rs index 12a21f95..9323fd93 100644 --- a/tests/async-tokio.rs +++ b/tests/async-tokio.rs @@ -2,6 +2,16 @@ use pretty_assertions::assert_eq; use quick_xml::events::Event::*; use quick_xml::reader::Reader; +// Import `small_buffers_tests!` +#[macro_use] +mod reader; + +small_buffers_tests!( + #[tokio::test] + read_event_into_async: tokio::io::BufReader<_>, + async, await +); + #[tokio::test] async fn test_sample() { let src = include_str!("documents/sample_rss.xml"); diff --git a/tests/encodings.rs b/tests/encodings.rs index 92ea1715..5f5676fa 100644 --- a/tests/encodings.rs +++ b/tests/encodings.rs @@ -1,10 +1,12 @@ +use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8, WINDOWS_1251}; +use pretty_assertions::assert_eq; use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*}; -use quick_xml::Reader; +use quick_xml::reader::Reader; mod decode { - use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8}; + use super::*; use pretty_assertions::assert_eq; - use quick_xml::encoding::*; + use quick_xml::encoding::detect_encoding; static UTF16BE_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf16be-bom.xml"); static UTF16LE_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf16le-bom.xml"); @@ -225,3 +227,47 @@ fn bom_removed_from_initial_text() { assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("paired"))); assert_eq!(r.read_event().unwrap(), Eof); } + +/// Checks that encoding is detected by BOM and changed after XML declaration +/// BOM indicates UTF-16LE, but XML - windows-1251 +#[test] +fn bom_overridden_by_declaration() { + let mut reader = Reader::from_reader(b"\xFF\xFE".as_ref()); + let mut buf = Vec::new(); + + assert_eq!(reader.decoder().encoding(), UTF_8); + assert!(matches!(reader.read_event_into(&mut buf).unwrap(), Decl(_))); + assert_eq!(reader.decoder().encoding(), WINDOWS_1251); + + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Eof); +} + +/// Checks that encoding is changed by XML declaration, but only once +#[test] +fn only_one_declaration_changes_encoding() { + let mut reader = + Reader::from_reader(b"".as_ref()); + let mut buf = Vec::new(); + + assert_eq!(reader.decoder().encoding(), UTF_8); + assert!(matches!(reader.read_event_into(&mut buf).unwrap(), Decl(_))); + assert_eq!(reader.decoder().encoding(), UTF_16LE); + + assert!(matches!(reader.read_event_into(&mut buf).unwrap(), Decl(_))); + assert_eq!(reader.decoder().encoding(), UTF_16LE); + + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Eof); +} + +/// Checks that XML declaration cannot change the encoding from UTF-8 if +/// a `Reader` was created using `from_str` method +#[test] +fn str_always_has_utf8() { + let mut reader = Reader::from_str(""); + + assert_eq!(reader.decoder().encoding(), UTF_8); + reader.read_event().unwrap(); + assert_eq!(reader.decoder().encoding(), UTF_8); + + assert_eq!(reader.read_event().unwrap(), Eof); +} diff --git a/tests/reader.rs b/tests/reader.rs index ae864953..f1e58de7 100644 --- a/tests/reader.rs +++ b/tests/reader.rs @@ -5,6 +5,161 @@ use quick_xml::reader::Reader; use pretty_assertions::assert_eq; +/// Tests for https://github.com/tafia/quick-xml/issues/469 +/// Exported to reuse in `async-tokio` tests. +#[macro_export] +macro_rules! small_buffers_tests { + ( + #[$test:meta] + $read_event:ident: $BufReader:ty + $(, $async:ident, $await:ident)? + ) => { + mod small_buffers { + use quick_xml::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event}; + use quick_xml::reader::Reader; + use pretty_assertions::assert_eq; + + #[$test] + $($async)? fn decl() { + let xml = ""; + // ^^^^^^^ data that fit into buffer + let size = xml.match_indices("?>").next().unwrap().0 + 1; + let br = <$BufReader>::with_capacity(size, xml.as_bytes()); + let mut reader = Reader::from_reader(br); + let mut buf = Vec::new(); + + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) + ); + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Eof + ); + } + + #[$test] + $($async)? fn pi() { + let xml = ""; + // ^^^^^ data that fit into buffer + let size = xml.match_indices("?>").next().unwrap().0 + 1; + let br = <$BufReader>::with_capacity(size, xml.as_bytes()); + let mut reader = Reader::from_reader(br); + let mut buf = Vec::new(); + + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::PI(BytesPI::new("pi")) + ); + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Eof + ); + } + + #[$test] + $($async)? fn empty() { + let xml = ""; + // ^^^^^^^ data that fit into buffer + let size = xml.match_indices("/>").next().unwrap().0 + 1; + let br = <$BufReader>::with_capacity(size, xml.as_bytes()); + let mut reader = Reader::from_reader(br); + let mut buf = Vec::new(); + + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Empty(BytesStart::new("empty")) + ); + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Eof + ); + } + + #[$test] + $($async)? fn cdata1() { + let xml = ""; + // ^^^^^^^^^^^^^^^ data that fit into buffer + let size = xml.match_indices("]]>").next().unwrap().0 + 1; + let br = <$BufReader>::with_capacity(size, xml.as_bytes()); + let mut reader = Reader::from_reader(br); + let mut buf = Vec::new(); + + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::CData(BytesCData::new("cdata")) + ); + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Eof + ); + } + + #[$test] + $($async)? fn cdata2() { + let xml = ""; + // ^^^^^^^^^^^^^^^^ data that fit into buffer + let size = xml.match_indices("]]>").next().unwrap().0 + 2; + let br = <$BufReader>::with_capacity(size, xml.as_bytes()); + let mut reader = Reader::from_reader(br); + let mut buf = Vec::new(); + + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::CData(BytesCData::new("cdata")) + ); + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Eof + ); + } + + #[$test] + $($async)? fn comment1() { + let xml = ""; + // ^^^^^^^^^^^^ data that fit into buffer + let size = xml.match_indices("-->").next().unwrap().0 + 1; + let br = <$BufReader>::with_capacity(size, xml.as_bytes()); + let mut reader = Reader::from_reader(br); + let mut buf = Vec::new(); + + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Comment(BytesText::new("comment")) + ); + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Eof + ); + } + + #[$test] + $($async)? fn comment2() { + let xml = ""; + // ^^^^^^^^^^^^^ data that fit into buffer + let size = xml.match_indices("-->").next().unwrap().0 + 2; + let br = <$BufReader>::with_capacity(size, xml.as_bytes()); + let mut reader = Reader::from_reader(br); + let mut buf = Vec::new(); + + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Comment(BytesText::new("comment")) + ); + assert_eq!( + reader.$read_event(&mut buf) $(.$await)? .unwrap(), + Event::Eof + ); + } + } + }; +} + +small_buffers_tests!( + #[test] + read_event_into: std::io::BufReader<_> +); + #[test] fn test_start_end() { let mut r = Reader::from_str(""); diff --git a/tests/writer-indentation.rs b/tests/writer-indentation.rs new file mode 100644 index 00000000..31240fd3 --- /dev/null +++ b/tests/writer-indentation.rs @@ -0,0 +1,555 @@ +use quick_xml::errors::Error; +use quick_xml::events::{BytesStart, BytesText, Event}; +use quick_xml::writer::Writer; + +use pretty_assertions::assert_eq; + +#[test] +fn self_closed() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + let tag = BytesStart::new("self-closed") + .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); + writer + .write_event(Event::Empty(tag)) + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#""# + ); +} + +#[test] +fn empty_paired() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + let start = BytesStart::new("paired") + .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); + let end = start.to_end(); + writer + .write_event(Event::Start(start.clone())) + .expect("write start tag failed"); + writer + .write_event(Event::End(end)) + .expect("write end tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#" +"# + ); +} + +#[test] +fn paired_with_inner() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + let start = BytesStart::new("paired") + .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); + let end = start.to_end(); + let inner = BytesStart::new("inner"); + + writer + .write_event(Event::Start(start.clone())) + .expect("write start tag failed"); + writer + .write_event(Event::Empty(inner)) + .expect("write inner tag failed"); + writer + .write_event(Event::End(end)) + .expect("write end tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#" + +"# + ); +} + +#[test] +fn paired_with_text() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + let start = BytesStart::new("paired") + .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); + let end = start.to_end(); + let text = BytesText::new("text"); + + writer + .write_event(Event::Start(start.clone())) + .expect("write start tag failed"); + writer + .write_event(Event::Text(text)) + .expect("write text failed"); + writer + .write_event(Event::End(end)) + .expect("write end tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#"text"# + ); +} + +#[test] +fn mixed_content() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + let start = BytesStart::new("paired") + .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); + let end = start.to_end(); + let text = BytesText::new("text"); + let inner = BytesStart::new("inner"); + + writer + .write_event(Event::Start(start.clone())) + .expect("write start tag failed"); + writer + .write_event(Event::Text(text)) + .expect("write text failed"); + writer + .write_event(Event::Empty(inner)) + .expect("write inner tag failed"); + writer + .write_event(Event::End(end)) + .expect("write end tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#"text +"# + ); +} + +#[test] +fn nested() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + let start = BytesStart::new("paired") + .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); + let end = start.to_end(); + let inner = BytesStart::new("inner"); + + writer + .write_event(Event::Start(start.clone())) + .expect("write start 1 tag failed"); + writer + .write_event(Event::Start(start.clone())) + .expect("write start 2 tag failed"); + writer + .write_event(Event::Empty(inner)) + .expect("write inner tag failed"); + writer + .write_event(Event::End(end.clone())) + .expect("write end tag 2 failed"); + writer + .write_event(Event::End(end)) + .expect("write end tag 1 failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#" + + + +"# + ); +} + +#[cfg(feature = "serialize")] +#[test] +fn serializable() { + use serde::Serialize; + + #[derive(Serialize)] + struct Foo { + #[serde(rename = "@attribute")] + attribute: &'static str, + + element: Bar, + list: Vec<&'static str>, + + #[serde(rename = "$text")] + text: &'static str, + + val: String, + } + + #[derive(Serialize)] + struct Bar { + baz: usize, + bat: usize, + } + + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + let content = Foo { + attribute: "attribute", + element: Bar { baz: 42, bat: 43 }, + list: vec!["first element", "second element"], + text: "text", + val: "foo".to_owned(), + }; + + let start = BytesStart::new("paired") + .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); + let end = start.to_end(); + + writer + .write_event(Event::Start(start.clone())) + .expect("write start tag failed"); + writer + .write_serializable("foo_element", &content) + .expect("write serializable inner contents failed"); + writer + .write_event(Event::End(end)) + .expect("write end tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#" + + + 42 + 43 + + first element + second element + text + foo + +"# + ); +} + +#[test] +fn element_writer_empty() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + writer + .create_element("empty") + .with_attribute(("attr1", "value1")) + .with_attribute(("attr2", "value2")) + .write_empty() + .expect("failure"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#""# + ); +} + +#[test] +fn element_writer_text() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + writer + .create_element("paired") + .with_attribute(("attr1", "value1")) + .with_attribute(("attr2", "value2")) + .write_text_content(BytesText::new("text")) + .expect("failure"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#"text"# + ); +} + +#[test] +fn element_writer_nested() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); + + writer + .create_element("outer") + .with_attribute(("attr1", "value1")) + .with_attribute(("attr2", "value2")) + .write_inner_content::<_, Error>(|writer| { + let fruits = ["apple", "orange", "banana"]; + for (quant, item) in fruits.iter().enumerate() { + writer + .create_element("fruit") + .with_attribute(("quantity", quant.to_string().as_str())) + .write_text_content(BytesText::new(item))?; + } + writer + .create_element("inner") + .write_inner_content(|writer| { + writer.create_element("empty").write_empty().map(|_| ()) + })?; + + Ok(()) + }) + .expect("failure"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#" + apple + orange + banana + + + +"# + ); +} + +mod in_attributes { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn newline_first() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .with_attribute(("first", "1")) + .with_attribute(("second", "2")) + .new_line() + .with_attribute(("third", "3")) + .with_attribute(("fourth", "4")) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "" + ); + } + + #[test] + fn newline_inside() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .with_attribute(("first", "1")) + .with_attribute(("second", "2")) + .new_line() + .with_attribute(("third", "3")) + .with_attribute(("fourth", "4")) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "" + ); + } + + #[test] + fn newline_last() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .with_attribute(("first", "1")) + .with_attribute(("second", "2")) + .new_line() + .with_attribute(("third", "3")) + .with_attribute(("fourth", "4")) + .new_line() + .write_empty() + .expect("write tag failed"); + + writer + .create_element("element") + .with_attribute(("first", "1")) + .with_attribute(("second", "2")) + .new_line() + .with_attribute(("third", "3")) + .with_attribute(("fourth", "4")) + .new_line() + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "\ + \n" + ); + } + + #[test] + fn newline_twice() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .new_line() + .write_empty() + .expect("write tag failed"); + + writer + .create_element("element") + .with_attribute(("first", "1")) + .new_line() + .new_line() + .with_attribute(("second", "2")) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#" +"# + ); + } + + #[test] + fn without_indent() { + let mut buffer = Vec::new(); + let mut writer = Writer::new(&mut buffer); + + writer + .create_element("element") + .new_line() + .new_line() + .write_empty() + .expect("write tag failed"); + + writer + .create_element("element") + .with_attribute(("first", "1")) + .new_line() + .new_line() + .with_attribute(("second", "2")) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#""# + ); + } + + #[test] + fn long_element_name() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b't', 1); + + writer + .create_element(String::from("x").repeat(128).as_str()) + .with_attribute(("first", "1")) + .new_line() + .with_attribute(("second", "2")) + .write_empty() + .expect("Problem with indentation reference"); + } +} + +mod in_attributes_multi { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn newline_first() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .with_attributes([("first", "1"), ("second", "2")]) + .new_line() + .with_attributes([("third", "3"), ("fourth", "4")]) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "" + ); + } + + #[test] + fn newline_inside() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .with_attributes([("first", "1"), ("second", "2")]) + .new_line() + .with_attributes([("third", "3"), ("fourth", "4")]) + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + r#""# + ); + } + + #[test] + fn newline_last() { + let mut buffer = Vec::new(); + let mut writer = Writer::new_with_indent(&mut buffer, b'_', 1); + + writer + .create_element("element") + .new_line() + .with_attributes([("first", "1"), ("second", "2")]) + .new_line() + .with_attributes([("third", "3"), ("fourth", "4")]) + .new_line() + .write_empty() + .expect("write tag failed"); + + writer + .create_element("element") + .with_attributes([("first", "1"), ("second", "2")]) + .new_line() + .with_attributes([("third", "3"), ("fourth", "4")]) + .new_line() + .write_empty() + .expect("write tag failed"); + + assert_eq!( + std::str::from_utf8(&buffer).unwrap(), + "\ + \n" + ); + } +}