From 4f0ffacc590c9aff2b5c99b8257dc2ef06357c25 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 20:09:01 +0500 Subject: [PATCH 01/26] Add doctests for Deref implementations of events --- src/events/mod.rs | 154 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 148 insertions(+), 6 deletions(-) diff --git a/src/events/mod.rs b/src/events/mod.rs index 5dd22f45..654165d8 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -56,13 +56,35 @@ use crate::utils::CowRef; use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string}; use attributes::{Attribute, Attributes}; -/// Opening tag data (`Event::Start`), with optional attributes. -/// -/// ``. +/// Opening tag data (`Event::Start`), with optional attributes: ``. /// /// The name can be accessed using the [`name`] or [`local_name`] methods. /// An iterator over the attributes is returned by the [`attributes`] method. /// +/// This event implements `Deref`. The `deref()` implementation +/// returns the content of this event between `<` and `>` or `/>`: +/// +/// ``` +/// # use quick_xml::events::{BytesStart, Event}; +/// # use quick_xml::reader::Reader; +/// # use pretty_assertions::assert_eq; +/// // Remember, that \ at the end of string literal strips +/// // all space characters to the first non-space character +/// let mut reader = Reader::from_str("\ +/// \ +/// " +/// ); +/// let content = "element a1 = 'val1' a2=\"val2\" "; +/// let event = BytesStart::from_content(content, 7); +/// +/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow())); +/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow())); +/// // deref coercion of &BytesStart to &[u8] +/// assert_eq!(&event as &[u8], content.as_bytes()); +/// // AsRef<[u8]> for &T + deref coercion +/// assert_eq!(event.as_ref(), content.as_bytes()); +/// ``` +/// /// [`name`]: Self::name /// [`local_name`]: Self::local_name /// [`attributes`]: Self::attributes @@ -351,6 +373,26 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> { /// An XML declaration (`Event::Decl`). /// /// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd) +/// +/// This event implements `Deref`. The `deref()` implementation +/// returns the content of this event between ``. +/// +/// Note, that inner text will not contain `?>` sequence inside: +/// +/// ``` +/// # use quick_xml::events::{BytesDecl, BytesStart, Event}; +/// # use quick_xml::reader::Reader; +/// # use pretty_assertions::assert_eq; +/// let mut reader = Reader::from_str(""); +/// let content = "xml version = '1.0' "; +/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3)); +/// +/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow())); +/// // deref coercion of &BytesDecl to &[u8] +/// assert_eq!(&event as &[u8], content.as_bytes()); +/// // AsRef<[u8]> for &T + deref coercion +/// assert_eq!(event.as_ref(), content.as_bytes()); +/// ``` #[derive(Clone, Debug, Eq, PartialEq)] pub struct BytesDecl<'a> { content: BytesStart<'a>, @@ -611,7 +653,38 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// A struct to manage `Event::End` events +/// Closing tag data (`Event::End`): ``. +/// +/// The name can be accessed using the [`name`] or [`local_name`] methods. +/// +/// This event implements `Deref`. The `deref()` implementation +/// returns the content of this event between ``. +/// +/// Note, that inner text will not contain `>` character inside: +/// +/// ``` +/// # use quick_xml::events::{BytesEnd, Event}; +/// # use quick_xml::reader::Reader; +/// # use pretty_assertions::assert_eq; +/// let mut reader = Reader::from_str(r#""#); +/// // Note, that this entire string considered as a .name() +/// let content = "element a1 = 'val1' a2=\"val2\" "; +/// let event = BytesEnd::new(content); +/// +/// reader.config_mut().trim_markup_names_in_closing_tags = false; +/// reader.config_mut().check_end_names = false; +/// reader.read_event().unwrap(); // Skip `` +/// +/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow())); +/// assert_eq!(event.name().as_ref(), content.as_bytes()); +/// // deref coercion of &BytesEnd to &[u8] +/// assert_eq!(&event as &[u8], content.as_bytes()); +/// // AsRef<[u8]> for &T + deref coercion +/// assert_eq!(event.as_ref(), content.as_bytes()); +/// ``` +/// +/// [`name`]: Self::name +/// [`local_name`]: Self::local_name #[derive(Clone, Eq, PartialEq)] pub struct BytesEnd<'a> { name: Cow<'a, [u8]>, @@ -701,7 +774,36 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> { //////////////////////////////////////////////////////////////////////////////////////////////////// /// Data from various events (most notably, `Event::Text`) that stored in XML -/// in escaped form. Internally data is stored in escaped form +/// in escaped form. Internally data is stored in escaped form. +/// +/// This event implements `Deref`. The `deref()` implementation +/// returns the content of this event. In case of comment this is everything +/// between `` and the text of comment will not contain `-->` inside. +/// In case of DTD this is everything between `` +/// (i.e. in case of DTD the first character is never space): +/// +/// ``` +/// # use quick_xml::events::{BytesText, Event}; +/// # use quick_xml::reader::Reader; +/// # use pretty_assertions::assert_eq; +/// // Remember, that \ at the end of string literal strips +/// // all space characters to the first non-space character +/// let mut reader = Reader::from_str("\ +/// \ +/// comment or text \ +/// " +/// ); +/// let content = "comment or text "; +/// let event = BytesText::new(content); +/// +/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow())); +/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow())); +/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow())); +/// // deref coercion of &BytesText to &[u8] +/// assert_eq!(&event as &[u8], content.as_bytes()); +/// // AsRef<[u8]> for &T + deref coercion +/// assert_eq!(event.as_ref(), content.as_bytes()); +/// ``` #[derive(Clone, Eq, PartialEq)] pub struct BytesText<'a> { /// Escaped then encoded content of the event. Content is encoded in the XML @@ -843,7 +945,27 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> { //////////////////////////////////////////////////////////////////////////////////////////////////// /// CDATA content contains unescaped data from the reader. If you want to write them as a text, -/// [convert](Self::escape) it to [`BytesText`] +/// [convert](Self::escape) it to [`BytesText`]. +/// +/// This event implements `Deref`. The `deref()` implementation +/// returns the content of this event between ``. +/// +/// Note, that inner text will not contain `]]>` sequence inside: +/// +/// ``` +/// # use quick_xml::events::{BytesCData, Event}; +/// # use quick_xml::reader::Reader; +/// # use pretty_assertions::assert_eq; +/// let mut reader = Reader::from_str(""); +/// let content = " CDATA section "; +/// let event = BytesCData::new(content); +/// +/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow())); +/// // deref coercion of &BytesCData to &[u8] +/// assert_eq!(&event as &[u8], content.as_bytes()); +/// // AsRef<[u8]> for &T + deref coercion +/// assert_eq!(event.as_ref(), content.as_bytes()); +/// ``` #[derive(Clone, Eq, PartialEq)] pub struct BytesCData<'a> { content: Cow<'a, [u8]>, @@ -1009,6 +1131,26 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> { /// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications. /// +/// This event implements `Deref`. The `deref()` implementation +/// returns the content of this event between ``. +/// +/// Note, that inner text will not contain `?>` sequence inside: +/// +/// ``` +/// # use quick_xml::events::{BytesPI, Event}; +/// # use quick_xml::reader::Reader; +/// # use pretty_assertions::assert_eq; +/// let mut reader = Reader::from_str(":-<~ ?>"); +/// let content = "processing instruction >:-<~ "; +/// let event = BytesPI::new(content); +/// +/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow())); +/// // deref coercion of &BytesPI to &[u8] +/// assert_eq!(&event as &[u8], content.as_bytes()); +/// // AsRef<[u8]> for &T + deref coercion +/// assert_eq!(event.as_ref(), content.as_bytes()); +/// ``` +/// /// [PI]: https://www.w3.org/TR/xml11/#sec-pi #[derive(Clone, Eq, PartialEq)] pub struct BytesPI<'a> { From 624891d6884b8106bec39466055e6fb9a2c163a5 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 21:56:22 +0500 Subject: [PATCH 02/26] Move definition of BytesDecl after BytesPI It was between BytesStart and BytesEnd which logically is not correct. It is logically based on BytesPI, because this is just a special processing instruction, for the XML processor itself --- src/events/mod.rs | 730 +++++++++++++++++++++++----------------------- 1 file changed, 365 insertions(+), 365 deletions(-) diff --git a/src/events/mod.rs b/src/events/mod.rs index 654165d8..79df7e05 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -370,289 +370,6 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> { } //////////////////////////////////////////////////////////////////////////////////////////////////// -/// An XML declaration (`Event::Decl`). -/// -/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd) -/// -/// This event implements `Deref`. The `deref()` implementation -/// returns the content of this event between ``. -/// -/// Note, that inner text will not contain `?>` sequence inside: -/// -/// ``` -/// # use quick_xml::events::{BytesDecl, BytesStart, Event}; -/// # use quick_xml::reader::Reader; -/// # use pretty_assertions::assert_eq; -/// let mut reader = Reader::from_str(""); -/// let content = "xml version = '1.0' "; -/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3)); -/// -/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow())); -/// // deref coercion of &BytesDecl to &[u8] -/// assert_eq!(&event as &[u8], content.as_bytes()); -/// // AsRef<[u8]> for &T + deref coercion -/// assert_eq!(event.as_ref(), content.as_bytes()); -/// ``` -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct BytesDecl<'a> { - content: BytesStart<'a>, -} - -impl<'a> BytesDecl<'a> { - /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), - /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) - /// attribute. - /// - /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. - /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since - /// the double quote character is not allowed in any of the attribute values. - pub fn new( - version: &str, - encoding: Option<&str>, - standalone: Option<&str>, - ) -> BytesDecl<'static> { - // Compute length of the buffer based on supplied attributes - // ' encoding=""' => 12 - let encoding_attr_len = if let Some(xs) = encoding { - 12 + xs.len() - } else { - 0 - }; - // ' standalone=""' => 14 - let standalone_attr_len = if let Some(xs) = standalone { - 14 + xs.len() - } else { - 0 - }; - // 'xml version=""' => 14 - let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len); - - buf.push_str("xml version=\""); - buf.push_str(version); - - if let Some(encoding_val) = encoding { - buf.push_str("\" encoding=\""); - buf.push_str(encoding_val); - } - - if let Some(standalone_val) = standalone { - buf.push_str("\" standalone=\""); - buf.push_str(standalone_val); - } - buf.push('"'); - - BytesDecl { - content: BytesStart::from_content(buf, 3), - } - } - - /// Creates a `BytesDecl` from a `BytesStart` - pub const fn from_start(start: BytesStart<'a>) -> Self { - Self { content: start } - } - - /// Gets xml version, excluding quotes (`'` or `"`). - /// - /// According to the [grammar], the version *must* be the first thing in the declaration. - /// This method tries to extract the first thing in the declaration and return it. - /// In case of multiple attributes value of the first one is returned. - /// - /// If version is missed in the declaration, or the first thing is not a version, - /// [`IllFormedError::MissingDeclVersion`] will be returned. - /// - /// # Examples - /// - /// ``` - /// use quick_xml::errors::{Error, IllFormedError}; - /// use quick_xml::events::{BytesDecl, BytesStart}; - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); - /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref()); - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0)); - /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref()); - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); - /// match decl.version() { - /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"), - /// _ => assert!(false), - /// } - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0)); - /// match decl.version() { - /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"), - /// _ => assert!(false), - /// } - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0)); - /// match decl.version() { - /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {}, - /// _ => assert!(false), - /// } - /// ``` - /// - /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl - pub fn version(&self) -> Result> { - // The version *must* be the first thing in the declaration. - match self.content.attributes().with_checks(false).next() { - Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value), - // first attribute was not "version" - Some(Ok(a)) => { - let found = from_utf8(a.key.as_ref())?.to_string(); - Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some( - found, - )))) - } - // error parsing attributes - Some(Err(e)) => Err(e.into()), - // no attributes - None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))), - } - } - - /// Gets xml encoding, excluding quotes (`'` or `"`). - /// - /// Although according to the [grammar] encoding must appear before `"standalone"` - /// and after `"version"`, this method does not check that. The first occurrence - /// of the attribute will be returned even if there are several. Also, method does - /// not restrict symbols that can forming the encoding, so the returned encoding - /// name may not correspond to the grammar. - /// - /// # Examples - /// - /// ``` - /// use std::borrow::Cow; - /// use quick_xml::Error; - /// use quick_xml::events::{BytesDecl, BytesStart}; - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); - /// assert!(decl.encoding().is_none()); - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); - /// match decl.encoding() { - /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"), - /// _ => assert!(false), - /// } - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0)); - /// match decl.encoding() { - /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"), - /// _ => assert!(false), - /// } - /// ``` - /// - /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl - pub fn encoding(&self) -> Option>> { - self.content - .try_get_attribute("encoding") - .map(|a| a.map(|a| a.value)) - .transpose() - } - - /// Gets xml standalone, excluding quotes (`'` or `"`). - /// - /// Although according to the [grammar] standalone flag must appear after `"version"` - /// and `"encoding"`, this method does not check that. The first occurrence of the - /// attribute will be returned even if there are several. Also, method does not - /// restrict symbols that can forming the value, so the returned flag name may not - /// correspond to the grammar. - /// - /// # Examples - /// - /// ``` - /// use std::borrow::Cow; - /// use quick_xml::Error; - /// use quick_xml::events::{BytesDecl, BytesStart}; - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); - /// assert!(decl.standalone().is_none()); - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0)); - /// match decl.standalone() { - /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"), - /// _ => assert!(false), - /// } - /// - /// // - /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0)); - /// match decl.standalone() { - /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"), - /// _ => assert!(false), - /// } - /// ``` - /// - /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl - pub fn standalone(&self) -> Option>> { - self.content - .try_get_attribute("standalone") - .map(|a| a.map(|a| a.value)) - .transpose() - } - - /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get) - /// algorithm. - /// - /// If encoding in not known, or `encoding` key was not found, returns `None`. - /// In case of duplicated `encoding` key, encoding, corresponding to the first - /// one, is returned. - #[cfg(feature = "encoding")] - pub fn encoder(&self) -> Option<&'static Encoding> { - self.encoding() - .and_then(|e| e.ok()) - .and_then(|e| Encoding::for_label(&e)) - } - - /// Converts the event into an owned event. - pub fn into_owned(self) -> BytesDecl<'static> { - BytesDecl { - content: self.content.into_owned(), - } - } - - /// Converts the event into a borrowed event. - #[inline] - pub fn borrow(&self) -> BytesDecl { - BytesDecl { - content: self.content.borrow(), - } - } -} - -impl<'a> Deref for BytesDecl<'a> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - &self.content - } -} - -#[cfg(feature = "arbitrary")] -impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> { - fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { - Ok(Self::new( - <&str>::arbitrary(u)?, - Option::<&str>::arbitrary(u)?, - Option::<&str>::arbitrary(u)?, - )) - } - - fn size_hint(depth: usize) -> (usize, Option) { - return <&str as arbitrary::Arbitrary>::size_hint(depth); - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - /// Closing tag data (`Event::End`): ``. /// /// The name can be accessed using the [`name`] or [`local_name`] methods. @@ -1166,121 +883,399 @@ impl<'a> BytesPI<'a> { } } - /// Creates a new `BytesPI` from a string. - /// - /// # Warning - /// - /// `content` must not contain the `?>` sequence. - #[inline] - pub fn new>>(content: C) -> Self { - let buf = str_cow_to_bytes(content); - let name_len = name_len(&buf); - Self { - content: BytesStart { buf, name_len }, + /// Creates a new `BytesPI` from a string. + /// + /// # Warning + /// + /// `content` must not contain the `?>` sequence. + #[inline] + pub fn new>>(content: C) -> Self { + let buf = str_cow_to_bytes(content); + let name_len = name_len(&buf); + Self { + content: BytesStart { buf, name_len }, + } + } + + /// Ensures that all data is owned to extend the object's lifetime if + /// necessary. + #[inline] + pub fn into_owned(self) -> BytesPI<'static> { + BytesPI { + content: self.content.into_owned().into(), + } + } + + /// Extracts the inner `Cow` from the `BytesPI` event container. + #[inline] + pub fn into_inner(self) -> Cow<'a, [u8]> { + self.content.buf + } + + /// Converts the event into a borrowed event. + #[inline] + pub fn borrow(&self) -> BytesPI { + BytesPI { + content: self.content.borrow(), + } + } + + /// A target used to identify the application to which the instruction is directed. + /// + /// # Example + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::BytesPI; + /// + /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); + /// assert_eq!(instruction.target(), b"xml-stylesheet"); + /// ``` + #[inline] + pub fn target(&self) -> &[u8] { + self.content.name().0 + } + + /// Content of the processing instruction. Contains everything between target + /// name and the end of the instruction. A direct consequence is that the first + /// character is always a space character. + /// + /// # Example + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::BytesPI; + /// + /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); + /// assert_eq!(instruction.content(), br#" href="style.css""#); + /// ``` + #[inline] + pub fn content(&self) -> &[u8] { + self.content.attributes_raw() + } + + /// A view of the processing instructions' content as a list of key-value pairs. + /// + /// Key-value pairs are used in some processing instructions, for example in + /// ``. + /// + /// Returned iterator does not validate attribute values as may required by + /// target's rules. For example, it doesn't check that substring `?>` is not + /// present in the attribute value. That shouldn't be the problem when event + /// is produced by the reader, because reader detects end of processing instruction + /// by the first `?>` sequence, as required by the specification, and therefore + /// this sequence cannot appear inside it. + /// + /// # Example + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use std::borrow::Cow; + /// use quick_xml::events::attributes::Attribute; + /// use quick_xml::events::BytesPI; + /// use quick_xml::name::QName; + /// + /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); + /// for attr in instruction.attributes() { + /// assert_eq!(attr, Ok(Attribute { + /// key: QName(b"href"), + /// value: Cow::Borrowed(b"style.css"), + /// })); + /// } + /// ``` + #[inline] + pub fn attributes(&self) -> Attributes { + self.content.attributes() + } +} + +impl<'a> Debug for BytesPI<'a> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "BytesPI {{ content: ")?; + write_cow_string(f, &self.content.buf)?; + write!(f, " }}") + } +} + +impl<'a> Deref for BytesPI<'a> { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + &self.content + } +} + +#[cfg(feature = "arbitrary")] +impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { + Ok(Self::new(<&str>::arbitrary(u)?)) + } + fn size_hint(depth: usize) -> (usize, Option) { + return <&str as arbitrary::Arbitrary>::size_hint(depth); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// An XML declaration (`Event::Decl`). +/// +/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd) +/// +/// This event implements `Deref`. The `deref()` implementation +/// returns the content of this event between ``. +/// +/// Note, that inner text will not contain `?>` sequence inside: +/// +/// ``` +/// # use quick_xml::events::{BytesDecl, BytesStart, Event}; +/// # use quick_xml::reader::Reader; +/// # use pretty_assertions::assert_eq; +/// let mut reader = Reader::from_str(""); +/// let content = "xml version = '1.0' "; +/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3)); +/// +/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow())); +/// // deref coercion of &BytesDecl to &[u8] +/// assert_eq!(&event as &[u8], content.as_bytes()); +/// // AsRef<[u8]> for &T + deref coercion +/// assert_eq!(event.as_ref(), content.as_bytes()); +/// ``` +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct BytesDecl<'a> { + content: BytesStart<'a>, +} + +impl<'a> BytesDecl<'a> { + /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), + /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) + /// attribute. + /// + /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. + /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since + /// the double quote character is not allowed in any of the attribute values. + pub fn new( + version: &str, + encoding: Option<&str>, + standalone: Option<&str>, + ) -> BytesDecl<'static> { + // Compute length of the buffer based on supplied attributes + // ' encoding=""' => 12 + let encoding_attr_len = if let Some(xs) = encoding { + 12 + xs.len() + } else { + 0 + }; + // ' standalone=""' => 14 + let standalone_attr_len = if let Some(xs) = standalone { + 14 + xs.len() + } else { + 0 + }; + // 'xml version=""' => 14 + let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len); + + buf.push_str("xml version=\""); + buf.push_str(version); + + if let Some(encoding_val) = encoding { + buf.push_str("\" encoding=\""); + buf.push_str(encoding_val); } - } - /// Ensures that all data is owned to extend the object's lifetime if - /// necessary. - #[inline] - pub fn into_owned(self) -> BytesPI<'static> { - BytesPI { - content: self.content.into_owned().into(), + if let Some(standalone_val) = standalone { + buf.push_str("\" standalone=\""); + buf.push_str(standalone_val); } - } + buf.push('"'); - /// Extracts the inner `Cow` from the `BytesPI` event container. - #[inline] - pub fn into_inner(self) -> Cow<'a, [u8]> { - self.content.buf + BytesDecl { + content: BytesStart::from_content(buf, 3), + } } - /// Converts the event into a borrowed event. - #[inline] - pub fn borrow(&self) -> BytesPI { - BytesPI { - content: self.content.borrow(), - } + /// Creates a `BytesDecl` from a `BytesStart` + pub const fn from_start(start: BytesStart<'a>) -> Self { + Self { content: start } } - /// A target used to identify the application to which the instruction is directed. + /// Gets xml version, excluding quotes (`'` or `"`). /// - /// # Example + /// According to the [grammar], the version *must* be the first thing in the declaration. + /// This method tries to extract the first thing in the declaration and return it. + /// In case of multiple attributes value of the first one is returned. + /// + /// If version is missed in the declaration, or the first thing is not a version, + /// [`IllFormedError::MissingDeclVersion`] will be returned. + /// + /// # Examples /// /// ``` - /// # use pretty_assertions::assert_eq; - /// use quick_xml::events::BytesPI; + /// use quick_xml::errors::{Error, IllFormedError}; + /// use quick_xml::events::{BytesDecl, BytesStart}; /// - /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); - /// assert_eq!(instruction.target(), b"xml-stylesheet"); + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); + /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref()); + /// + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0)); + /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref()); + /// + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); + /// match decl.version() { + /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"), + /// _ => assert!(false), + /// } + /// + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0)); + /// match decl.version() { + /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"), + /// _ => assert!(false), + /// } + /// + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0)); + /// match decl.version() { + /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {}, + /// _ => assert!(false), + /// } /// ``` - #[inline] - pub fn target(&self) -> &[u8] { - self.content.name().0 + /// + /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl + pub fn version(&self) -> Result> { + // The version *must* be the first thing in the declaration. + match self.content.attributes().with_checks(false).next() { + Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value), + // first attribute was not "version" + Some(Ok(a)) => { + let found = from_utf8(a.key.as_ref())?.to_string(); + Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some( + found, + )))) + } + // error parsing attributes + Some(Err(e)) => Err(e.into()), + // no attributes + None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))), + } } - /// Content of the processing instruction. Contains everything between target - /// name and the end of the instruction. A direct consequence is that the first - /// character is always a space character. + /// Gets xml encoding, excluding quotes (`'` or `"`). /// - /// # Example + /// Although according to the [grammar] encoding must appear before `"standalone"` + /// and after `"version"`, this method does not check that. The first occurrence + /// of the attribute will be returned even if there are several. Also, method does + /// not restrict symbols that can forming the encoding, so the returned encoding + /// name may not correspond to the grammar. + /// + /// # Examples /// /// ``` - /// # use pretty_assertions::assert_eq; - /// use quick_xml::events::BytesPI; + /// use std::borrow::Cow; + /// use quick_xml::Error; + /// use quick_xml::events::{BytesDecl, BytesStart}; /// - /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); - /// assert_eq!(instruction.content(), br#" href="style.css""#); + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); + /// assert!(decl.encoding().is_none()); + /// + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); + /// match decl.encoding() { + /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"), + /// _ => assert!(false), + /// } + /// + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0)); + /// match decl.encoding() { + /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"), + /// _ => assert!(false), + /// } /// ``` - #[inline] - pub fn content(&self) -> &[u8] { - self.content.attributes_raw() + /// + /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl + pub fn encoding(&self) -> Option>> { + self.content + .try_get_attribute("encoding") + .map(|a| a.map(|a| a.value)) + .transpose() } - /// A view of the processing instructions' content as a list of key-value pairs. - /// - /// Key-value pairs are used in some processing instructions, for example in - /// ``. + /// Gets xml standalone, excluding quotes (`'` or `"`). /// - /// Returned iterator does not validate attribute values as may required by - /// target's rules. For example, it doesn't check that substring `?>` is not - /// present in the attribute value. That shouldn't be the problem when event - /// is produced by the reader, because reader detects end of processing instruction - /// by the first `?>` sequence, as required by the specification, and therefore - /// this sequence cannot appear inside it. + /// Although according to the [grammar] standalone flag must appear after `"version"` + /// and `"encoding"`, this method does not check that. The first occurrence of the + /// attribute will be returned even if there are several. Also, method does not + /// restrict symbols that can forming the value, so the returned flag name may not + /// correspond to the grammar. /// - /// # Example + /// # Examples /// /// ``` - /// # use pretty_assertions::assert_eq; /// use std::borrow::Cow; - /// use quick_xml::events::attributes::Attribute; - /// use quick_xml::events::BytesPI; - /// use quick_xml::name::QName; + /// use quick_xml::Error; + /// use quick_xml::events::{BytesDecl, BytesStart}; /// - /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); - /// for attr in instruction.attributes() { - /// assert_eq!(attr, Ok(Attribute { - /// key: QName(b"href"), - /// value: Cow::Borrowed(b"style.css"), - /// })); + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); + /// assert!(decl.standalone().is_none()); + /// + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0)); + /// match decl.standalone() { + /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"), + /// _ => assert!(false), + /// } + /// + /// // + /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0)); + /// match decl.standalone() { + /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"), + /// _ => assert!(false), /// } /// ``` - #[inline] - pub fn attributes(&self) -> Attributes { - self.content.attributes() + /// + /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl + pub fn standalone(&self) -> Option>> { + self.content + .try_get_attribute("standalone") + .map(|a| a.map(|a| a.value)) + .transpose() } -} -impl<'a> Debug for BytesPI<'a> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "BytesPI {{ content: ")?; - write_cow_string(f, &self.content.buf)?; - write!(f, " }}") + /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get) + /// algorithm. + /// + /// If encoding in not known, or `encoding` key was not found, returns `None`. + /// In case of duplicated `encoding` key, encoding, corresponding to the first + /// one, is returned. + #[cfg(feature = "encoding")] + pub fn encoder(&self) -> Option<&'static Encoding> { + self.encoding() + .and_then(|e| e.ok()) + .and_then(|e| Encoding::for_label(&e)) + } + + /// Converts the event into an owned event. + pub fn into_owned(self) -> BytesDecl<'static> { + BytesDecl { + content: self.content.into_owned(), + } + } + + /// Converts the event into a borrowed event. + #[inline] + pub fn borrow(&self) -> BytesDecl { + BytesDecl { + content: self.content.borrow(), + } } } -impl<'a> Deref for BytesPI<'a> { +impl<'a> Deref for BytesDecl<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { @@ -1289,10 +1284,15 @@ impl<'a> Deref for BytesPI<'a> { } #[cfg(feature = "arbitrary")] -impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> { +impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { - Ok(Self::new(<&str>::arbitrary(u)?)) + Ok(Self::new( + <&str>::arbitrary(u)?, + Option::<&str>::arbitrary(u)?, + Option::<&str>::arbitrary(u)?, + )) } + fn size_hint(depth: usize) -> (usize, Option) { return <&str as arbitrary::Arbitrary>::size_hint(depth); } From 005937704b457568196a1728374b62dbd1ef7f10 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 17:54:07 +0500 Subject: [PATCH 03/26] Move all writer-only tests from tests/unit_tests.rs to tests/writer.rs Moved tests: - test_new_xml_decl_full -> declaration::full - test_new_xml_decl_standalone -> declaration::standalone - test_new_xml_decl_encoding -> declaration::encoding - test_new_xml_decl_version -> declaration::version - test_new_xml_decl_empty -> declaration::empty --- tests/unit_tests.rs | 80 +--------------------------------------- tests/writer.rs | 89 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 79 deletions(-) create mode 100644 tests/writer.rs diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 954b90a2..0e5b4b54 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -4,7 +4,7 @@ use std::str::from_utf8; use quick_xml::events::attributes::{AttrError, Attribute}; use quick_xml::events::Event::*; -use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText}; +use quick_xml::events::{BytesEnd, BytesStart, BytesText}; use quick_xml::name::QName; use quick_xml::reader::Reader; use quick_xml::writer::Writer; @@ -288,84 +288,6 @@ fn test_write_attrs() -> Result<()> { Ok(()) } -#[test] -fn test_new_xml_decl_full() { - let mut writer = Writer::new(Vec::new()); - writer - .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), Some("yo")))) - .expect("writing xml decl should succeed"); - - let result = writer.into_inner(); - assert_eq!( - String::from_utf8(result).expect("utf-8 output"), - "", - "writer output (LHS)" - ); -} - -#[test] -fn test_new_xml_decl_standalone() { - let mut writer = Writer::new(Vec::new()); - writer - .write_event(Decl(BytesDecl::new("1.2", None, Some("yo")))) - .expect("writing xml decl should succeed"); - - let result = writer.into_inner(); - assert_eq!( - String::from_utf8(result).expect("utf-8 output"), - "", - "writer output (LHS)" - ); -} - -#[test] -fn test_new_xml_decl_encoding() { - let mut writer = Writer::new(Vec::new()); - writer - .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), None))) - .expect("writing xml decl should succeed"); - - let result = writer.into_inner(); - assert_eq!( - String::from_utf8(result).expect("utf-8 output"), - "", - "writer output (LHS)" - ); -} - -#[test] -fn test_new_xml_decl_version() { - let mut writer = Writer::new(Vec::new()); - writer - .write_event(Decl(BytesDecl::new("1.2", None, None))) - .expect("writing xml decl should succeed"); - - let result = writer.into_inner(); - assert_eq!( - String::from_utf8(result).expect("utf-8 output"), - "", - "writer output (LHS)" - ); -} - -/// This test ensures that empty XML declaration attribute values are not a problem. -#[test] -fn test_new_xml_decl_empty() { - let mut writer = Writer::new(Vec::new()); - // An empty version should arguably be an error, but we don't expect anyone to actually supply - // an empty version. - writer - .write_event(Decl(BytesDecl::new("", Some(""), Some("")))) - .expect("writing xml decl should succeed"); - - let result = writer.into_inner(); - assert_eq!( - String::from_utf8(result).expect("utf-8 output"), - "", - "writer output (LHS)" - ); -} - #[test] fn test_escaped_content() { let mut r = Reader::from_str("<test>"); diff --git a/tests/writer.rs b/tests/writer.rs new file mode 100644 index 00000000..6cf184f1 --- /dev/null +++ b/tests/writer.rs @@ -0,0 +1,89 @@ +use quick_xml::events::{BytesDecl, Event::*}; +use quick_xml::writer::Writer; + +mod declaration { + use super::*; + use pretty_assertions::assert_eq; + + /// Written: version, encoding, standalone + #[test] + fn full() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), Some("yo")))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); + } + + /// Written: version, standalone + #[test] + fn standalone() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Decl(BytesDecl::new("1.2", None, Some("yo")))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); + } + + /// Written: version, encoding + #[test] + fn encoding() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), None))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); + } + + /// Written: version + #[test] + fn version() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Decl(BytesDecl::new("1.2", None, None))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); + } + + /// This test ensures that empty XML declaration attribute values are not a problem. + #[test] + fn empty() { + let mut writer = Writer::new(Vec::new()); + // An empty version should arguably be an error, but we don't expect anyone to actually supply + // an empty version. + writer + .write_event(Decl(BytesDecl::new("", Some(""), Some("")))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); + } +} From 1ebc3968fc08ca28110e52312aaff84ada2fa419 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 22:27:20 +0500 Subject: [PATCH 04/26] Add base tests of event writing --- tests/writer.rs | 149 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 1 deletion(-) diff --git a/tests/writer.rs b/tests/writer.rs index 6cf184f1..19777582 100644 --- a/tests/writer.rs +++ b/tests/writer.rs @@ -1,6 +1,10 @@ -use quick_xml::events::{BytesDecl, Event::*}; +use quick_xml::events::{ + BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event::*, +}; use quick_xml::writer::Writer; +use pretty_assertions::assert_eq; + mod declaration { use super::*; use pretty_assertions::assert_eq; @@ -87,3 +91,146 @@ mod declaration { ); } } + +#[test] +fn pi() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(PI(BytesPI::new("xml-stylesheet href='theme.xls' "))) + .expect("writing processing instruction should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); +} + +#[test] +fn empty() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Empty( + BytesStart::new("game").with_attributes([("publisher", "Blizzard")]), + )) + .expect("writing empty tag should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + r#""#, + "writer output (LHS)" + ); +} + +#[test] +fn start() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Start( + BytesStart::new("info").with_attributes([("genre", "RTS")]), + )) + .expect("writing start tag should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + r#""#, + "writer output (LHS)" + ); +} + +#[test] +fn end() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(End(BytesEnd::new("info"))) + .expect("writing end tag should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); +} + +#[test] +fn text() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Text(BytesText::new( + "Kerrigan & Raynor: The Z[erg] programming language", + ))) + .expect("writing text should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "Kerrigan & Raynor: The Z[erg] programming language", + "writer output (LHS)" + ); +} + +#[test] +fn cdata() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(CData(BytesCData::new( + "Kerrigan & Raynor: The Z[erg] programming language", + ))) + .expect("writing CDATA section should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); +} + +#[test] +fn comment() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Comment(BytesText::from_escaped( + "Kerrigan & Raynor: The Z[erg] programming language", + ))) + .expect("writing comment should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); +} + +#[test] +fn doctype() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(DocType(BytesText::new("some DTD here..."))) + .expect("writing DTD should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); +} + +#[test] +fn eof() { + let mut writer = Writer::new(Vec::new()); + writer.write_event(Eof).expect("writing EOF should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "", + "writer output (LHS)" + ); +} From 1b6a0e61b3c77944c95de4009cd7ea3ca2ee33ad Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 18:35:06 +0500 Subject: [PATCH 05/26] Remove duplicated roundtrip test `test_read_write_roundtrip_results_in_identity` the same as `test_read_write_roundtrip` except that it does not contain escaped data in the text --- tests/unit_tests.rs | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 0e5b4b54..2ce30792 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -320,31 +320,6 @@ fn test_escaped_content() { next_eq!(r, End, b"a"); } -#[test] -fn test_read_write_roundtrip_results_in_identity() -> Result<()> { - let input = r#" - -
-
-
-
data
-
- "#; - - let mut reader = Reader::from_str(input); - let mut writer = Writer::new(Cursor::new(Vec::new())); - loop { - match reader.read_event()? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), - } - } - - let result = writer.into_inner().into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), input); - Ok(()) -} - #[test] fn test_read_write_roundtrip() -> Result<()> { let input = r#" From 37852e00db9cfc1cbd3ace1b4faeb244dc877880 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 18:53:25 +0500 Subject: [PATCH 06/26] Check documents after roundtrip as strings In case of errors diff will readable --- tests/unit_tests.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 2ce30792..b5cf6e2e 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -178,7 +178,7 @@ fn test_writer() -> Result<()> { } let result = writer.into_inner().into_inner(); - assert_eq!(result, txt.as_bytes()); + assert_eq!(String::from_utf8(result).unwrap(), txt); Ok(()) } @@ -196,7 +196,7 @@ fn test_writer_borrow() -> Result<()> { } let result = writer.into_inner().into_inner(); - assert_eq!(result, txt.as_bytes()); + assert_eq!(String::from_utf8(result).unwrap(), txt); Ok(()) } @@ -214,8 +214,7 @@ fn test_writer_indent() -> Result<()> { } let result = writer.into_inner().into_inner(); - assert_eq!(result, txt.as_bytes()); - + assert_eq!(String::from_utf8(result).unwrap(), txt); Ok(()) } @@ -233,8 +232,7 @@ fn test_writer_indent_cdata() -> Result<()> { } let result = writer.into_inner().into_inner(); - assert_eq!(result, txt.as_bytes()); - + assert_eq!(String::from_utf8(result).unwrap(), txt); Ok(()) } @@ -283,8 +281,7 @@ fn test_write_attrs() -> Result<()> { } let result = writer.into_inner().into_inner(); - assert_eq!(result, expected.as_bytes()); - + assert_eq!(String::from_utf8(result).unwrap(), expected); Ok(()) } From 866e1cda0b1245838d8c67c06959b18e2863a23d Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 18:42:15 +0500 Subject: [PATCH 07/26] Remove unnecessary wrapping in Cursor in roundtrip tests --- tests/unit_tests.rs | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index b5cf6e2e..9011761d 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -1,5 +1,4 @@ use std::borrow::Cow; -use std::io::Cursor; use std::str::from_utf8; use quick_xml::events::attributes::{AttrError, Attribute}; @@ -169,7 +168,7 @@ fn test_writer() -> Result<()> { let txt = include_str!("../tests/documents/test_writer.xml").trim(); let mut reader = Reader::from_str(txt); reader.config_mut().trim_text(true); - let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut writer = Writer::new(Vec::new()); loop { match reader.read_event()? { Eof => break, @@ -177,7 +176,7 @@ fn test_writer() -> Result<()> { } } - let result = writer.into_inner().into_inner(); + let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), txt); Ok(()) } @@ -187,7 +186,7 @@ fn test_writer_borrow() -> Result<()> { let txt = include_str!("../tests/documents/test_writer.xml").trim(); let mut reader = Reader::from_str(txt); reader.config_mut().trim_text(true); - let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut writer = Writer::new(Vec::new()); loop { match reader.read_event()? { Eof => break, @@ -195,7 +194,7 @@ fn test_writer_borrow() -> Result<()> { } } - let result = writer.into_inner().into_inner(); + let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), txt); Ok(()) } @@ -205,7 +204,7 @@ fn test_writer_indent() -> Result<()> { let txt = include_str!("../tests/documents/test_writer_indent.xml"); let mut reader = Reader::from_str(txt); reader.config_mut().trim_text(true); - let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); + let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); loop { match reader.read_event()? { Eof => break, @@ -213,7 +212,7 @@ fn test_writer_indent() -> Result<()> { } } - let result = writer.into_inner().into_inner(); + let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), txt); Ok(()) } @@ -223,7 +222,7 @@ fn test_writer_indent_cdata() -> Result<()> { let txt = include_str!("../tests/documents/test_writer_indent_cdata.xml"); let mut reader = Reader::from_str(txt); reader.config_mut().trim_text(true); - let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); + let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); loop { match reader.read_event()? { Eof => break, @@ -231,7 +230,7 @@ fn test_writer_indent_cdata() -> Result<()> { } } - let result = writer.into_inner().into_inner(); + let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), txt); Ok(()) } @@ -241,7 +240,7 @@ fn test_write_empty_element_attrs() -> Result<()> { let str_from = r#""#; let expected = r#""#; let mut reader = Reader::from_str(str_from); - let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut writer = Writer::new(Vec::new()); loop { match reader.read_event()? { Eof => break, @@ -249,7 +248,7 @@ fn test_write_empty_element_attrs() -> Result<()> { } } - let result = writer.into_inner().into_inner(); + let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), expected); Ok(()) } @@ -262,7 +261,7 @@ fn test_write_attrs() -> Result<()> { let expected = r#""#; let mut reader = Reader::from_str(str_from); reader.config_mut().trim_text(true); - let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut writer = Writer::new(Vec::new()); loop { let event = match reader.read_event()? { Eof => break, @@ -280,7 +279,7 @@ fn test_write_attrs() -> Result<()> { assert!(writer.write_event(event).is_ok()); } - let result = writer.into_inner().into_inner(); + let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), expected); Ok(()) } @@ -329,7 +328,7 @@ fn test_read_write_roundtrip() -> Result<()> { "#; let mut reader = Reader::from_str(input); - let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut writer = Writer::new(Vec::new()); loop { match reader.read_event()? { Eof => break, @@ -337,7 +336,7 @@ fn test_read_write_roundtrip() -> Result<()> { } } - let result = writer.into_inner().into_inner(); + let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input); Ok(()) } @@ -354,7 +353,7 @@ fn test_read_write_roundtrip_escape_text() -> Result<()> { "#; let mut reader = Reader::from_str(input); - let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut writer = Writer::new(Vec::new()); loop { match reader.read_event()? { Eof => break, @@ -366,7 +365,7 @@ fn test_read_write_roundtrip_escape_text() -> Result<()> { } } - let result = writer.into_inner().into_inner(); + let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input); Ok(()) } From 837b9e268611f8796164855c84f2b0656110c067 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 22:49:58 +0500 Subject: [PATCH 08/26] Remove unnecessary set of trim_text option This option already tested in tests/reader-config.rs, therefore, we will not overcomplicate the tests where it is not needed --- src/writer.rs | 1 - tests/namespaces.rs | 14 ++------------ tests/test.rs | 3 --- tests/unit_tests.rs | 17 ----------------- 4 files changed, 2 insertions(+), 33 deletions(-) diff --git a/src/writer.rs b/src/writer.rs index a1b6fa85..74a70317 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -28,7 +28,6 @@ use {crate::de::DeError, serde::Serialize}; /// /// let xml = r#"text"#; /// let mut reader = Reader::from_str(xml); -/// reader.config_mut().trim_text(true); /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// loop { /// match reader.read_event() { diff --git a/tests/namespaces.rs b/tests/namespaces.rs index a50e2306..dc3d0185 100644 --- a/tests/namespaces.rs +++ b/tests/namespaces.rs @@ -9,7 +9,6 @@ use std::borrow::Cow; #[test] fn namespace() { let mut r = NsReader::from_str("in namespace!"); - r.config_mut().trim_text(true); // match r.read_resolved_event() { @@ -91,7 +90,6 @@ fn namespace() { #[test] fn default_namespace() { let mut r = NsReader::from_str(r#""#); - r.config_mut().trim_text(true); // match r.read_resolved_event() { @@ -149,7 +147,6 @@ fn default_namespace() { #[test] fn default_namespace_reset() { let mut r = NsReader::from_str(r#""#); - r.config_mut().trim_text(true); // match r.read_resolved_event() { @@ -211,7 +208,6 @@ fn attributes_empty_ns() { let src = ""; let mut r = NsReader::from_str(src); - r.config_mut().trim_text(true); let e = match r.read_resolved_event() { Ok((Unbound, Empty(e))) => e, @@ -257,9 +253,7 @@ fn attributes_empty_ns_expanded() { let src = ""; let mut r = NsReader::from_str(src); - let config = r.config_mut(); - config.trim_text(true); - config.expand_empty_elements = true; + r.config_mut().expand_empty_elements = true; { let e = match r.read_resolved_event() { Ok((Unbound, Start(e))) => e, @@ -308,7 +302,6 @@ fn default_ns_shadowing_empty() { let src = ""; let mut r = NsReader::from_str(src); - r.config_mut().trim_text(true); // { @@ -385,9 +378,7 @@ fn default_ns_shadowing_expanded() { let src = ""; let mut r = NsReader::from_str(src); - let config = r.config_mut(); - config.trim_text(true); - config.expand_empty_elements = true; + r.config_mut().expand_empty_elements = true; // { @@ -458,7 +449,6 @@ fn reserved_name() { // Name "xmlns-something" is reserved according to spec, because started with "xml" let mut r = NsReader::from_str(r#""#); - r.config_mut().trim_text(true); // match r.read_resolved_event() { diff --git a/tests/test.rs b/tests/test.rs index 58887b88..e72d61f8 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -26,7 +26,6 @@ fn test_sample() { fn test_attributes_empty() { let src = ""; let mut r = Reader::from_str(src); - r.config_mut().trim_text(true); match r.read_event() { Ok(Empty(e)) => { let mut attrs = e.attributes(); @@ -54,7 +53,6 @@ fn test_attributes_empty() { fn test_attribute_equal() { let src = ""; let mut r = Reader::from_str(src); - r.config_mut().trim_text(true); match r.read_event() { Ok(Empty(e)) => { let mut attrs = e.attributes(); @@ -74,7 +72,6 @@ fn test_attribute_equal() { #[test] fn test_clone_reader() { let mut reader = Reader::from_str("text"); - reader.config_mut().trim_text(true); assert!(matches!(reader.read_event().unwrap(), Start(_))); diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 9011761d..77904f4e 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -55,28 +55,24 @@ macro_rules! next_eq { #[test] fn test_start_end() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); next_eq!(r, Start, b"a", End, b"a"); } #[test] fn test_start_end_with_ws() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); next_eq!(r, Start, b"a", End, b"a"); } #[test] fn test_start_end_attr() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); next_eq!(r, Start, b"a", End, b"a"); } #[test] fn test_empty_attr() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); next_eq!(r, Empty, b"a"); } @@ -90,21 +86,18 @@ fn test_start_end_comment() { #[test] fn test_start_txt_end() { let mut r = Reader::from_str("test"); - r.config_mut().trim_text(true); next_eq!(r, Start, b"a", Text, b"test", End, b"a"); } #[test] fn test_comment() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); next_eq!(r, Comment, b"test"); } #[test] fn test_xml_decl() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); match r.read_event().unwrap() { Decl(ref e) => { match e.version() { @@ -138,28 +131,24 @@ fn test_xml_decl() { #[test] fn test_cdata() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); next_eq!(r, CData, b"test"); } #[test] fn test_cdata_open_close() { let mut r = Reader::from_str(" test]]>"); - r.config_mut().trim_text(true); next_eq!(r, CData, b"test <> test"); } #[test] fn test_start_attr() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); next_eq!(r, Start, b"a"); } #[test] fn test_nested() { let mut r = Reader::from_str("test"); - r.config_mut().trim_text(true); next_eq!(r, Start, b"a", Start, b"b", Text, b"test", End, b"b", Empty, b"c", End, b"a"); } @@ -260,7 +249,6 @@ fn test_write_attrs() -> Result<()> { let str_from = r#""#; let expected = r#""#; let mut reader = Reader::from_str(str_from); - reader.config_mut().trim_text(true); let mut writer = Writer::new(Vec::new()); loop { let event = match reader.read_event()? { @@ -287,7 +275,6 @@ fn test_write_attrs() -> Result<()> { #[test] fn test_escaped_content() { let mut r = Reader::from_str("<test>"); - r.config_mut().trim_text(true); next_eq!(r, Start, b"a"); match r.read_event() { Ok(Text(e)) => { @@ -373,7 +360,6 @@ fn test_read_write_roundtrip_escape_text() -> Result<()> { #[test] fn test_closing_bracket_in_single_quote_attr() { let mut r = Reader::from_str(""); - r.config_mut().trim_text(true); match r.read_event() { Ok(Start(e)) => { let mut attrs = e.attributes(); @@ -401,7 +387,6 @@ fn test_closing_bracket_in_single_quote_attr() { #[test] fn test_closing_bracket_in_double_quote_attr() { let mut r = Reader::from_str(r#""#); - r.config_mut().trim_text(true); match r.read_event() { Ok(Start(e)) => { let mut attrs = e.attributes(); @@ -429,7 +414,6 @@ fn test_closing_bracket_in_double_quote_attr() { #[test] fn test_closing_bracket_in_double_quote_mixed() { let mut r = Reader::from_str(r#""#); - r.config_mut().trim_text(true); match r.read_event() { Ok(Start(e)) => { let mut attrs = e.attributes(); @@ -457,7 +441,6 @@ fn test_closing_bracket_in_double_quote_mixed() { #[test] fn test_closing_bracket_in_single_quote_mixed() { let mut r = Reader::from_str(r#""#); - r.config_mut().trim_text(true); match r.read_event() { Ok(Start(e)) => { let mut attrs = e.attributes(); From c05ea3c2542d68c4fa3225f361776bb4c816d646 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 18:28:12 +0500 Subject: [PATCH 09/26] Move all roundtrip tests from tests/unit_tests.rs to tests/roundtrip.rs Moved tests: - test_writer -> with_trim - test_writer_borrow -> with_trim_ref - test_writer_indent -> with_indent - test_writer_indent_cdata -> with_indent_cdata - test_write_empty_element_attrs -> events::empty - test_write_attrs -> partial_rewrite - test_read_write_roundtrip -> simple - test_read_write_roundtrip_escape_text -> reescape_text --- tests/roundtrip.rs | 189 ++++++++++++++++++++++++++++++++++++++++++++ tests/unit_tests.rs | 179 +---------------------------------------- 2 files changed, 190 insertions(+), 178 deletions(-) create mode 100644 tests/roundtrip.rs diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs new file mode 100644 index 00000000..1a6e5e2b --- /dev/null +++ b/tests/roundtrip.rs @@ -0,0 +1,189 @@ +//! Contains tests that checks that writing events from a reader produces the same documents. + +use quick_xml::events::attributes::AttrError; +use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*}; +use quick_xml::reader::Reader; +use quick_xml::writer::Writer; +use quick_xml::Result; + +use pretty_assertions::assert_eq; + +mod events { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn empty() -> Result<()> { + let input = r#""#; + let mut reader = Reader::from_str(input); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event()? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + Ok(()) + } +} + +/// Indent of the last tag mismatched intentionally +const XML: &str = r#" + +
+
+
+
data <escaped>
+
+ "#; + +/// Directly write event from reader without any processing. +#[test] +fn simple() -> Result<()> { + let mut reader = Reader::from_str(XML); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event()? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), XML); + Ok(()) +} + +/// Directly write event from reader without processing (except auto-trimming text). +#[test] +fn with_trim() -> Result<()> { + let input = include_str!("documents/test_writer.xml").trim(); + let mut reader = Reader::from_str(input); + reader.config_mut().trim_text(true); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event()? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + Ok(()) +} + +/// Directly write reference to event from reader without processing (except auto-trimming text). +#[test] +fn with_trim_ref() -> Result<()> { + let input = include_str!("documents/test_writer.xml").trim(); + let mut reader = Reader::from_str(input); + reader.config_mut().trim_text(true); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event()? { + Eof => break, + e => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + Ok(()) +} + +/// Directly write event from reader without processing (except auto-trimming text) +/// with the same indentation settings as in the original document. +#[test] +fn with_indent() -> Result<()> { + let input = include_str!("documents/test_writer_indent.xml"); + let mut reader = Reader::from_str(input); + reader.config_mut().trim_text(true); + let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); + loop { + match reader.read_event()? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + Ok(()) +} + +/// Directly write event from reader without processing (except auto-trimming text) +/// with the same indentation settings as in the original document. +/// Document contains CDATA section. +#[test] +fn with_indent_cdata() -> Result<()> { + let input = include_str!("documents/test_writer_indent_cdata.xml"); + let mut reader = Reader::from_str(input); + reader.config_mut().trim_text(true); + let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); + loop { + match reader.read_event()? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + Ok(()) +} + +/// Directly write event from reader with unescaping and re-escaping content of the `Text` events. +#[test] +fn reescape_text() -> Result<()> { + let mut reader = Reader::from_str(XML); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event()? { + Eof => break, + Text(e) => { + let t = e.unescape().unwrap(); + assert!(writer.write_event(Text(BytesText::new(&t))).is_ok()); + } + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), XML); + Ok(()) +} + +/// Rewrite some events during processing +#[test] +fn partial_rewrite() -> Result<()> { + type AttrResult = std::result::Result; + + let str_from = r#""#; + let expected = r#""#; + let mut reader = Reader::from_str(str_from); + let mut writer = Writer::new(Vec::new()); + loop { + let event = match reader.read_event()? { + Eof => break, + Start(elem) => { + let mut attrs = elem.attributes().collect::>>()?; + attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); + let mut elem = BytesStart::new("copy"); + elem.extend_attributes(attrs); + elem.push_attribute(("x", "y\"z")); + Start(elem) + } + End(_) => End(BytesEnd::new("copy")), + e => e, + }; + assert!(writer.write_event(event).is_ok()); + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), expected); + Ok(()) +} diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 77904f4e..f4a3e64e 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -1,13 +1,10 @@ use std::borrow::Cow; use std::str::from_utf8; -use quick_xml::events::attributes::{AttrError, Attribute}; +use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; -use quick_xml::events::{BytesEnd, BytesStart, BytesText}; use quick_xml::name::QName; use quick_xml::reader::Reader; -use quick_xml::writer::Writer; -use quick_xml::Result; use pretty_assertions::assert_eq; @@ -152,126 +149,6 @@ fn test_nested() { next_eq!(r, Start, b"a", Start, b"b", Text, b"test", End, b"b", Empty, b"c", End, b"a"); } -#[test] -fn test_writer() -> Result<()> { - let txt = include_str!("../tests/documents/test_writer.xml").trim(); - let mut reader = Reader::from_str(txt); - reader.config_mut().trim_text(true); - let mut writer = Writer::new(Vec::new()); - loop { - match reader.read_event()? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), - } - } - - let result = writer.into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), txt); - Ok(()) -} - -#[test] -fn test_writer_borrow() -> Result<()> { - let txt = include_str!("../tests/documents/test_writer.xml").trim(); - let mut reader = Reader::from_str(txt); - reader.config_mut().trim_text(true); - let mut writer = Writer::new(Vec::new()); - loop { - match reader.read_event()? { - Eof => break, - e => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` - } - } - - let result = writer.into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), txt); - Ok(()) -} - -#[test] -fn test_writer_indent() -> Result<()> { - let txt = include_str!("../tests/documents/test_writer_indent.xml"); - let mut reader = Reader::from_str(txt); - reader.config_mut().trim_text(true); - let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); - loop { - match reader.read_event()? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), - } - } - - let result = writer.into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), txt); - Ok(()) -} - -#[test] -fn test_writer_indent_cdata() -> Result<()> { - let txt = include_str!("../tests/documents/test_writer_indent_cdata.xml"); - let mut reader = Reader::from_str(txt); - reader.config_mut().trim_text(true); - let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); - loop { - match reader.read_event()? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), - } - } - - let result = writer.into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), txt); - Ok(()) -} - -#[test] -fn test_write_empty_element_attrs() -> Result<()> { - let str_from = r#""#; - let expected = r#""#; - let mut reader = Reader::from_str(str_from); - let mut writer = Writer::new(Vec::new()); - loop { - match reader.read_event()? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), - } - } - - let result = writer.into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), expected); - Ok(()) -} - -#[test] -fn test_write_attrs() -> Result<()> { - type AttrResult = std::result::Result; - - let str_from = r#""#; - let expected = r#""#; - let mut reader = Reader::from_str(str_from); - let mut writer = Writer::new(Vec::new()); - loop { - let event = match reader.read_event()? { - Eof => break, - Start(elem) => { - let mut attrs = elem.attributes().collect::>>()?; - attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); - let mut elem = BytesStart::new("copy"); - elem.extend_attributes(attrs); - elem.push_attribute(("x", "y\"z")); - Start(elem) - } - End(_) => End(BytesEnd::new("copy")), - e => e, - }; - assert!(writer.write_event(event).is_ok()); - } - - let result = writer.into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), expected); - Ok(()) -} - #[test] fn test_escaped_content() { let mut r = Reader::from_str("<test>"); @@ -303,60 +180,6 @@ fn test_escaped_content() { next_eq!(r, End, b"a"); } -#[test] -fn test_read_write_roundtrip() -> Result<()> { - let input = r#" - -
-
-
-
data <escaped>
-
- "#; - - let mut reader = Reader::from_str(input); - let mut writer = Writer::new(Vec::new()); - loop { - match reader.read_event()? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), - } - } - - let result = writer.into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), input); - Ok(()) -} - -#[test] -fn test_read_write_roundtrip_escape_text() -> Result<()> { - let input = r#" - -
-
-
-
data <escaped>
-
- "#; - - let mut reader = Reader::from_str(input); - let mut writer = Writer::new(Vec::new()); - loop { - match reader.read_event()? { - Eof => break, - Text(e) => { - let t = e.unescape().unwrap(); - assert!(writer.write_event(Text(BytesText::new(&t))).is_ok()); - } - e => assert!(writer.write_event(e).is_ok()), - } - } - - let result = writer.into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), input); - Ok(()) -} - #[test] fn test_closing_bracket_in_single_quote_attr() { let mut r = Reader::from_str(""); From a48c88f8b684a25340e2236a4d0af167258a4e62 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 23:31:50 +0500 Subject: [PATCH 10/26] Use traditional tests with explicit .unwrap() instead of returning error from test function I'm afraid that if to return error then it will be not clear where it is originated --- tests/roundtrip.rs | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs index 1a6e5e2b..a8989250 100644 --- a/tests/roundtrip.rs +++ b/tests/roundtrip.rs @@ -4,7 +4,6 @@ use quick_xml::events::attributes::AttrError; use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*}; use quick_xml::reader::Reader; use quick_xml::writer::Writer; -use quick_xml::Result; use pretty_assertions::assert_eq; @@ -13,12 +12,12 @@ mod events { use pretty_assertions::assert_eq; #[test] - fn empty() -> Result<()> { - let input = r#""#; + fn empty() { + let input = r#""#; let mut reader = Reader::from_str(input); let mut writer = Writer::new(Vec::new()); loop { - match reader.read_event()? { + match reader.read_event().unwrap() { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -26,7 +25,6 @@ mod events { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input); - Ok(()) } } @@ -42,11 +40,11 @@ const XML: &str = r#" /// Directly write event from reader without any processing. #[test] -fn simple() -> Result<()> { +fn simple() { let mut reader = Reader::from_str(XML); let mut writer = Writer::new(Vec::new()); loop { - match reader.read_event()? { + match reader.read_event().unwrap() { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -54,18 +52,17 @@ fn simple() -> Result<()> { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), XML); - Ok(()) } /// Directly write event from reader without processing (except auto-trimming text). #[test] -fn with_trim() -> Result<()> { +fn with_trim() { let input = include_str!("documents/test_writer.xml").trim(); let mut reader = Reader::from_str(input); reader.config_mut().trim_text(true); let mut writer = Writer::new(Vec::new()); loop { - match reader.read_event()? { + match reader.read_event().unwrap() { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -73,18 +70,17 @@ fn with_trim() -> Result<()> { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input); - Ok(()) } /// Directly write reference to event from reader without processing (except auto-trimming text). #[test] -fn with_trim_ref() -> Result<()> { +fn with_trim_ref() { let input = include_str!("documents/test_writer.xml").trim(); let mut reader = Reader::from_str(input); reader.config_mut().trim_text(true); let mut writer = Writer::new(Vec::new()); loop { - match reader.read_event()? { + match reader.read_event().unwrap() { Eof => break, e => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` } @@ -92,19 +88,18 @@ fn with_trim_ref() -> Result<()> { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input); - Ok(()) } /// Directly write event from reader without processing (except auto-trimming text) /// with the same indentation settings as in the original document. #[test] -fn with_indent() -> Result<()> { +fn with_indent() { let input = include_str!("documents/test_writer_indent.xml"); let mut reader = Reader::from_str(input); reader.config_mut().trim_text(true); let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); loop { - match reader.read_event()? { + match reader.read_event().unwrap() { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -112,20 +107,19 @@ fn with_indent() -> Result<()> { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input); - Ok(()) } /// Directly write event from reader without processing (except auto-trimming text) /// with the same indentation settings as in the original document. /// Document contains CDATA section. #[test] -fn with_indent_cdata() -> Result<()> { +fn with_indent_cdata() { let input = include_str!("documents/test_writer_indent_cdata.xml"); let mut reader = Reader::from_str(input); reader.config_mut().trim_text(true); let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); loop { - match reader.read_event()? { + match reader.read_event().unwrap() { Eof => break, e => assert!(writer.write_event(e).is_ok()), } @@ -133,16 +127,15 @@ fn with_indent_cdata() -> Result<()> { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input); - Ok(()) } /// Directly write event from reader with unescaping and re-escaping content of the `Text` events. #[test] -fn reescape_text() -> Result<()> { +fn reescape_text() { let mut reader = Reader::from_str(XML); let mut writer = Writer::new(Vec::new()); loop { - match reader.read_event()? { + match reader.read_event().unwrap() { Eof => break, Text(e) => { let t = e.unescape().unwrap(); @@ -154,12 +147,11 @@ fn reescape_text() -> Result<()> { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), XML); - Ok(()) } /// Rewrite some events during processing #[test] -fn partial_rewrite() -> Result<()> { +fn partial_rewrite() { type AttrResult = std::result::Result; let str_from = r#""#; @@ -167,10 +159,10 @@ fn partial_rewrite() -> Result<()> { let mut reader = Reader::from_str(str_from); let mut writer = Writer::new(Vec::new()); loop { - let event = match reader.read_event()? { + let event = match reader.read_event().unwrap() { Eof => break, Start(elem) => { - let mut attrs = elem.attributes().collect::>>()?; + let mut attrs = elem.attributes().collect::>>().unwrap(); attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); let mut elem = BytesStart::new("copy"); elem.extend_attributes(attrs); @@ -185,5 +177,4 @@ fn partial_rewrite() -> Result<()> { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), expected); - Ok(()) } From 92574ddfa4be626e78c77353b6deaf68001fc23c Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 23:26:31 +0500 Subject: [PATCH 11/26] Add tests for roundtrip of each possible event except Eof --- tests/roundtrip.rs | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs index a8989250..0f0ed0f9 100644 --- a/tests/roundtrip.rs +++ b/tests/roundtrip.rs @@ -11,6 +11,24 @@ mod events { use super::*; use pretty_assertions::assert_eq; + /// Test start and end together because reading only end event requires special + /// setting on the reader + #[test] + fn start_end() { + let input = r#""#; + let mut reader = Reader::from_str(input); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event().unwrap() { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + } + #[test] fn empty() { let input = r#""#; @@ -26,6 +44,86 @@ mod events { let result = writer.into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input); } + + #[test] + fn text() { + let input = "it is just arbitrary text & some character reference"; + let mut reader = Reader::from_str(input); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event().unwrap() { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + } + + #[test] + fn cdata() { + let input = ""; + let mut reader = Reader::from_str(input); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event().unwrap() { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + } + + #[test] + fn pi() { + let input = ""; + let mut reader = Reader::from_str(input); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event().unwrap() { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + } + + #[test] + fn decl() { + let input = ""; + let mut reader = Reader::from_str(input); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event().unwrap() { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + } + + #[test] + fn comment() { + let input = ""; + let mut reader = Reader::from_str(input); + let mut writer = Writer::new(Vec::new()); + loop { + match reader.read_event().unwrap() { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input); + } } /// Indent of the last tag mismatched intentionally From c3272e61d2a41b0f5f678bc3059eaf3ea7c7e2d3 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 19:39:34 +0500 Subject: [PATCH 12/26] Use direct comparison of events instead of using macro Although this changes some checks for Text, CData, and Comment events - previously they checked `Deref` implementation of corresponding events, but this can be considered, as not planned side effect. Explicit testing of this implementation was added in one of previous commits --- tests/unit_tests.rs | 121 +++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index f4a3e64e..d711218a 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -2,94 +2,81 @@ use std::borrow::Cow; use std::str::from_utf8; use quick_xml::events::attributes::Attribute; -use quick_xml::events::Event::*; +use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event::*}; use quick_xml::name::QName; use quick_xml::reader::Reader; use pretty_assertions::assert_eq; -macro_rules! next_eq_name { - ($r:expr, $t:tt, $bytes:expr) => { - match $r.read_event().unwrap() { - $t(ref e) if e.name().as_ref() == $bytes => (), - e => panic!( - "expecting {}({:?}), found {:?}", - stringify!($t), - from_utf8($bytes), - e - ), - } - }; -} - -macro_rules! next_eq_content { - ($r:expr, $t:tt, $bytes:expr) => { - match $r.read_event().unwrap() { - $t(ref e) if e.as_ref() == $bytes => (), - e => panic!( - "expecting {}({:?}), found {:?}", - stringify!($t), - from_utf8($bytes), - e - ), - } - }; -} - -macro_rules! next_eq { - ($r:expr, Start, $bytes:expr) => (next_eq_name!($r, Start, $bytes);); - ($r:expr, End, $bytes:expr) => (next_eq_name!($r, End, $bytes);); - ($r:expr, Empty, $bytes:expr) => (next_eq_name!($r, Empty, $bytes);); - ($r:expr, Comment, $bytes:expr) => (next_eq_content!($r, Comment, $bytes);); - ($r:expr, Text, $bytes:expr) => (next_eq_content!($r, Text, $bytes);); - ($r:expr, CData, $bytes:expr) => (next_eq_content!($r, CData, $bytes);); - ($r:expr, $t0:tt, $b0:expr, $($t:tt, $bytes:expr),*) => { - next_eq!($r, $t0, $b0); - next_eq!($r, $($t, $bytes),*); - }; -} - #[test] fn test_start_end() { let mut r = Reader::from_str(""); - next_eq!(r, Start, b"a", End, b"a"); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("a"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] fn test_start_end_with_ws() { let mut r = Reader::from_str(""); - next_eq!(r, Start, b"a", End, b"a"); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("a"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] fn test_start_end_attr() { let mut r = Reader::from_str(""); - next_eq!(r, Start, b"a", End, b"a"); + + assert_eq!( + r.read_event().unwrap(), + Start(BytesStart::from_content("a b=\"test\"", 1)) + ); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] fn test_empty_attr() { let mut r = Reader::from_str(""); - next_eq!(r, Empty, b"a"); + + assert_eq!( + r.read_event().unwrap(), + Empty(BytesStart::from_content("a b=\"test\" ", 1)) + ); } #[test] fn test_start_end_comment() { let mut r = Reader::from_str(" "); r.config_mut().trim_text(true); - next_eq!(r, Start, b"b", Empty, b"a", Empty, b"a", Comment, b"t", End, b"b"); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("b"))); + assert_eq!( + r.read_event().unwrap(), + Empty(BytesStart::from_content("a b=\"test\" c=\"test\"", 1)) + ); + assert_eq!( + r.read_event().unwrap(), + Empty(BytesStart::from_content("a ", 1)) + ); + assert_eq!(r.read_event().unwrap(), Comment(BytesText::new("t"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("b"))); } #[test] fn test_start_txt_end() { let mut r = Reader::from_str("test"); - next_eq!(r, Start, b"a", Text, b"test", End, b"a"); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("a"))); + assert_eq!(r.read_event().unwrap(), Text(BytesText::new("test"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] fn test_comment() { let mut r = Reader::from_str(""); - next_eq!(r, Comment, b"test"); + + assert_eq!(r.read_event().unwrap(), Comment(BytesText::new("test"))); } #[test] @@ -128,31 +115,47 @@ fn test_xml_decl() { #[test] fn test_cdata() { let mut r = Reader::from_str(""); - next_eq!(r, CData, b"test"); + + assert_eq!(r.read_event().unwrap(), CData(BytesCData::new("test"))); } #[test] fn test_cdata_open_close() { let mut r = Reader::from_str(" test]]>"); - next_eq!(r, CData, b"test <> test"); + + assert_eq!( + r.read_event().unwrap(), + CData(BytesCData::new("test <> test")) + ); } #[test] fn test_start_attr() { let mut r = Reader::from_str(""); - next_eq!(r, Start, b"a"); + + assert_eq!( + r.read_event().unwrap(), + Start(BytesStart::from_content("a b=\"c\"", 1)) + ); } #[test] fn test_nested() { let mut r = Reader::from_str("test"); - next_eq!(r, Start, b"a", Start, b"b", Text, b"test", End, b"b", Empty, b"c", End, b"a"); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("a"))); + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("b"))); + assert_eq!(r.read_event().unwrap(), Text(BytesText::new("test"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("b"))); + assert_eq!(r.read_event().unwrap(), Empty(BytesStart::new("c"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] fn test_escaped_content() { let mut r = Reader::from_str("<test>"); - next_eq!(r, Start, b"a"); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("a"))); match r.read_event() { Ok(Text(e)) => { assert_eq!( @@ -177,7 +180,7 @@ fn test_escaped_content() { e ), } - next_eq!(r, End, b"a"); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] @@ -204,7 +207,7 @@ fn test_closing_bracket_in_single_quote_attr() { } x => panic!("expected , got {:?}", x), } - next_eq!(r, End, b"a"); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] @@ -231,7 +234,7 @@ fn test_closing_bracket_in_double_quote_attr() { } x => panic!("expected , got {:?}", x), } - next_eq!(r, End, b"a"); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] @@ -258,7 +261,7 @@ fn test_closing_bracket_in_double_quote_mixed() { } x => panic!("expected , got {:?}", x), } - next_eq!(r, End, b"a"); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } #[test] @@ -285,5 +288,5 @@ fn test_closing_bracket_in_single_quote_mixed() { } x => panic!("expected , got {:?}", x), } - next_eq!(r, End, b"a"); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } From db582ced6cd57157764349677b9f02dc1738e67f Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 19:41:55 +0500 Subject: [PATCH 13/26] Move all attribute reading tests to tests/reader-attributes.rs Renamed tests: - test_closing_bracket_in_single_quote_attr -> single_gt - test_closing_bracket_in_double_quote_attr -> double_gt - test_closing_bracket_in_double_quote_mixed -> double_gt_apos - test_closing_bracket_in_single_quote_mixed -> single_gt_quot - test_attributes_empty -> empty_tag - test_attribute_equal -> equal_sign_in_value --- tests/reader-attributes.rs | 161 +++++++++++++++++++++++++++++++++++++ tests/test.rs | 52 ------------ tests/unit_tests.rs | 111 ------------------------- 3 files changed, 161 insertions(+), 163 deletions(-) create mode 100644 tests/reader-attributes.rs diff --git a/tests/reader-attributes.rs b/tests/reader-attributes.rs new file mode 100644 index 00000000..8d51b22a --- /dev/null +++ b/tests/reader-attributes.rs @@ -0,0 +1,161 @@ +use std::borrow::Cow; + +use quick_xml::events::attributes::Attribute; +use quick_xml::events::{BytesEnd, Event::*}; +use quick_xml::name::QName; +use quick_xml::reader::Reader; + +use pretty_assertions::assert_eq; + +#[test] +fn single_gt() { + let mut reader = Reader::from_str(""); + match reader.read_event() { + Ok(Start(e)) => { + let mut attrs = e.attributes(); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"attr"), + value: Cow::Borrowed(b">"), + })) + ); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"check"), + value: Cow::Borrowed(b"2"), + })) + ); + assert_eq!(attrs.next(), None); + } + x => panic!("expected , got {:?}", x), + } + assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a"))); +} + +#[test] +fn single_gt_quot() { + let mut reader = Reader::from_str(r#""#); + match reader.read_event() { + Ok(Start(e)) => { + let mut attrs = e.attributes(); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"attr"), + value: Cow::Borrowed(br#"">""#), + })) + ); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"check"), + value: Cow::Borrowed(br#""2""#), + })) + ); + assert_eq!(attrs.next(), None); + } + x => panic!("expected , got {:?}", x), + } + assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a"))); +} + +#[test] +fn double_gt() { + let mut reader = Reader::from_str(r#""#); + match reader.read_event() { + Ok(Start(e)) => { + let mut attrs = e.attributes(); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"attr"), + value: Cow::Borrowed(b">"), + })) + ); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"check"), + value: Cow::Borrowed(b"2"), + })) + ); + assert_eq!(attrs.next(), None); + } + x => panic!("expected , got {:?}", x), + } + assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a"))); +} + +#[test] +fn double_gt_apos() { + let mut reader = Reader::from_str(r#""#); + match reader.read_event() { + Ok(Start(e)) => { + let mut attrs = e.attributes(); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"attr"), + value: Cow::Borrowed(b"'>'"), + })) + ); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"check"), + value: Cow::Borrowed(b"'2'"), + })) + ); + assert_eq!(attrs.next(), None); + } + x => panic!("expected , got {:?}", x), + } + assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a"))); +} + +#[test] +fn empty_tag() { + let mut reader = Reader::from_str(""); + match reader.read_event() { + Ok(Empty(e)) => { + let mut attrs = e.attributes(); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"att1"), + value: Cow::Borrowed(b"a"), + })) + ); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"att2"), + value: Cow::Borrowed(b"b"), + })) + ); + assert_eq!(attrs.next(), None); + } + e => panic!("Expecting Empty event, got {:?}", e), + } +} + +#[test] +fn equal_sign_in_value() { + let mut reader = Reader::from_str(""); + match reader.read_event() { + Ok(Empty(e)) => { + let mut attrs = e.attributes(); + assert_eq!( + attrs.next(), + Some(Ok(Attribute { + key: QName(b"att1"), + value: Cow::Borrowed(b"a=b"), + })) + ); + assert_eq!(attrs.next(), None); + } + e => panic!("Expecting Empty event, got {:?}", e), + } +} diff --git a/tests/test.rs b/tests/test.rs index e72d61f8..edbd555c 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,10 +1,5 @@ -use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; -use quick_xml::name::QName; use quick_xml::reader::Reader; -use std::borrow::Cow; - -use pretty_assertions::assert_eq; #[test] fn test_sample() { @@ -22,53 +17,6 @@ fn test_sample() { println!("{}", count); } -#[test] -fn test_attributes_empty() { - let src = ""; - let mut r = Reader::from_str(src); - match r.read_event() { - Ok(Empty(e)) => { - let mut attrs = e.attributes(); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"att1"), - value: Cow::Borrowed(b"a"), - })) - ); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"att2"), - value: Cow::Borrowed(b"b"), - })) - ); - assert_eq!(attrs.next(), None); - } - e => panic!("Expecting Empty event, got {:?}", e), - } -} - -#[test] -fn test_attribute_equal() { - let src = ""; - let mut r = Reader::from_str(src); - match r.read_event() { - Ok(Empty(e)) => { - let mut attrs = e.attributes(); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"att1"), - value: Cow::Borrowed(b"a=b"), - })) - ); - assert_eq!(attrs.next(), None); - } - e => panic!("Expecting Empty event, got {:?}", e), - } -} - #[test] fn test_clone_reader() { let mut reader = Reader::from_str("text"); diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index d711218a..3876dd9c 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -1,9 +1,6 @@ -use std::borrow::Cow; use std::str::from_utf8; -use quick_xml::events::attributes::Attribute; use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event::*}; -use quick_xml::name::QName; use quick_xml::reader::Reader; use pretty_assertions::assert_eq; @@ -182,111 +179,3 @@ fn test_escaped_content() { } assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } - -#[test] -fn test_closing_bracket_in_single_quote_attr() { - let mut r = Reader::from_str(""); - match r.read_event() { - Ok(Start(e)) => { - let mut attrs = e.attributes(); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"attr"), - value: Cow::Borrowed(b">"), - })) - ); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"check"), - value: Cow::Borrowed(b"2"), - })) - ); - assert_eq!(attrs.next(), None); - } - x => panic!("expected , got {:?}", x), - } - assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); -} - -#[test] -fn test_closing_bracket_in_double_quote_attr() { - let mut r = Reader::from_str(r#""#); - match r.read_event() { - Ok(Start(e)) => { - let mut attrs = e.attributes(); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"attr"), - value: Cow::Borrowed(b">"), - })) - ); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"check"), - value: Cow::Borrowed(b"2"), - })) - ); - assert_eq!(attrs.next(), None); - } - x => panic!("expected , got {:?}", x), - } - assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); -} - -#[test] -fn test_closing_bracket_in_double_quote_mixed() { - let mut r = Reader::from_str(r#""#); - match r.read_event() { - Ok(Start(e)) => { - let mut attrs = e.attributes(); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"attr"), - value: Cow::Borrowed(b"'>'"), - })) - ); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"check"), - value: Cow::Borrowed(b"'2'"), - })) - ); - assert_eq!(attrs.next(), None); - } - x => panic!("expected , got {:?}", x), - } - assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); -} - -#[test] -fn test_closing_bracket_in_single_quote_mixed() { - let mut r = Reader::from_str(r#""#); - match r.read_event() { - Ok(Start(e)) => { - let mut attrs = e.attributes(); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"attr"), - value: Cow::Borrowed(br#"">""#), - })) - ); - assert_eq!( - attrs.next(), - Some(Ok(Attribute { - key: QName(b"check"), - value: Cow::Borrowed(br#""2""#), - })) - ); - assert_eq!(attrs.next(), None); - } - x => panic!("expected , got {:?}", x), - } - assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); -} From 361a78c29e9f0a5e77ca22af305aa7106dc63b4b Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 19:08:16 +0500 Subject: [PATCH 14/26] Rename tests/unit_tests.rs to tests/reader.rs --- tests/{unit_tests.rs => reader.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{unit_tests.rs => reader.rs} (100%) diff --git a/tests/unit_tests.rs b/tests/reader.rs similarity index 100% rename from tests/unit_tests.rs rename to tests/reader.rs From a5331bded0dea5c68b06668941cbdea3fef8f6ac Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 18 Jun 2024 20:22:07 +0500 Subject: [PATCH 15/26] Merge tests/test.rs into tests/reader.rs Renamed tests: - test_sample -> it_works - test_clone_reader -> clone_state --- tests/reader.rs | 32 ++++++++++++++++++++++++++++++++ tests/test.rs | 33 --------------------------------- 2 files changed, 32 insertions(+), 33 deletions(-) delete mode 100644 tests/test.rs diff --git a/tests/reader.rs b/tests/reader.rs index 3876dd9c..2152cc1b 100644 --- a/tests/reader.rs +++ b/tests/reader.rs @@ -179,3 +179,35 @@ fn test_escaped_content() { } assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a"))); } + +#[test] +fn it_works() { + let src = include_str!("documents/sample_rss.xml"); + let mut reader = Reader::from_str(src); + let mut count = 0; + loop { + match reader.read_event().unwrap() { + Start(_) => count += 1, + Decl(e) => println!("{:?}", e.version()), + Eof => break, + _ => (), + } + } + println!("{}", count); +} + +/// Checks that after cloning reader the parse state is independent in each copy +#[test] +fn clone_state() { + let mut reader = Reader::from_str("text"); + + assert!(matches!(reader.read_event().unwrap(), Start(_))); + + let mut cloned = reader.clone(); + + assert!(matches!(reader.read_event().unwrap(), Text(_))); + assert!(matches!(reader.read_event().unwrap(), End(_))); + + assert!(matches!(cloned.read_event().unwrap(), Text(_))); + assert!(matches!(cloned.read_event().unwrap(), End(_))); +} diff --git a/tests/test.rs b/tests/test.rs deleted file mode 100644 index edbd555c..00000000 --- a/tests/test.rs +++ /dev/null @@ -1,33 +0,0 @@ -use quick_xml::events::Event::*; -use quick_xml::reader::Reader; - -#[test] -fn test_sample() { - let src = include_str!("documents/sample_rss.xml"); - let mut r = Reader::from_str(src); - let mut count = 0; - loop { - match r.read_event().unwrap() { - Start(_) => count += 1, - Decl(e) => println!("{:?}", e.version()), - Eof => break, - _ => (), - } - } - println!("{}", count); -} - -#[test] -fn test_clone_reader() { - let mut reader = Reader::from_str("text"); - - assert!(matches!(reader.read_event().unwrap(), Start(_))); - - let mut cloned = reader.clone(); - - assert!(matches!(reader.read_event().unwrap(), Text(_))); - assert!(matches!(reader.read_event().unwrap(), End(_))); - - assert!(matches!(cloned.read_event().unwrap(), Text(_))); - assert!(matches!(cloned.read_event().unwrap(), End(_))); -} From a7ee2946acd86c9011929bd503a102d88771e233 Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 19 Jun 2024 00:02:58 +0500 Subject: [PATCH 16/26] Rename tests/namespaces.rs to tests/reader-namespaces.rs --- tests/{namespaces.rs => reader-namespaces.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{namespaces.rs => reader-namespaces.rs} (100%) diff --git a/tests/namespaces.rs b/tests/reader-namespaces.rs similarity index 100% rename from tests/namespaces.rs rename to tests/reader-namespaces.rs From 6ce094488a28529303bb93c9244c1d61d9fbb173 Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 19 Jun 2024 00:31:30 +0500 Subject: [PATCH 17/26] Remove excess tests from xmlrs_reader_tests.rs The following tests in `xmlrs_reader_tests.rs` already checked by the following existing tests: tabs_1 file: tests/reader-config.rs test: all in `trim_text*` modules issue_83_duplicate_attributes file: src/events/attributes.rs test: all in `duplicated` modules issue_93_large_characters_in_entity_references This test actually not valid for quick-xml, because this is well-formedless test of acceptable characters in entity names, but quick-xml does not yet performs such checks issue_98_cdata_ending_with_right_bracket file: src/de/mod.rs test: many `cdata` tests in `merge_text` module issue_attributes_have_no_default_namespace file: tests/reader-namespaces.rs test: default_ns_shadowing_empty default_namespace_applies_to_end_elem file: tests/reader-namespaces.rs test: default_namespace --- tests/xmlrs_reader_tests.rs | 89 ------------------------------------- 1 file changed, 89 deletions(-) diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index d438f240..6c2c91f3 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -178,65 +178,6 @@ fn sample_ns_short() { ); } -#[test] -fn tabs_1() { - test( - "\t\t", - r#" - StartElement(a) - EmptyElement(b) - EndElement(a) - EndDocument - "#, - true, - ); -} - -#[test] -fn issue_83_duplicate_attributes() { - // Error when parsing attributes won't stop main event reader - // as it is a lazy operation => add ending events - test( - r#""#, - " - |StartElement(hello) - |1:30 EmptyElement(some-tag, attr-error: \ - position 16: duplicated attribute, previous declaration at position 9) - |EndElement(hello) - |EndDocument - ", - true, - ); -} - -#[test] -fn issue_93_large_characters_in_entity_references() { - test( - r#"&𤶼;"#, - r#" - |StartElement(hello) - |1:10 FailedUnescape([38, 240, 164, 182, 188, 59]; Error while escaping character at range 1..5: Unrecognized escape symbol: "𤶼") - |EndElement(hello) - |EndDocument - "#, - true, - ) -} - -#[test] -fn issue_98_cdata_ending_with_right_bracket() { - test( - r#""#, - r#" - |StartElement(hello) - |CData(Foo [Bar]) - |EndElement(hello) - |EndDocument - "#, - false, - ) -} - #[test] fn issue_105_unexpected_double_dash() { test( @@ -284,20 +225,6 @@ fn issue_105_unexpected_double_dash() { ); } -#[test] -fn issue_attributes_have_no_default_namespace() { - // At the moment, the 'test' method doesn't render namespaces for attribute names. - // This test only checks whether the default namespace got applied to the EmptyElement. - test( - r#""#, - r#" - |EmptyElement({urn:foo}hello [x="y"]) - |EndDocument - "#, - true, - ); -} - #[test] fn issue_default_namespace_on_outermost_element() { // Regression test @@ -311,22 +238,6 @@ fn issue_default_namespace_on_outermost_element() { ); } -#[test] -fn default_namespace_applies_to_end_elem() { - test( - r#" - - "#, - r#" - |StartElement({urn:foo}hello [x="y"]) - |EmptyElement({urn:foo}inner) - |EndElement({urn:foo}hello) - |EndDocument - "#, - true, - ); -} - #[track_caller] fn test(input: &str, output: &str, trim: bool) { test_bytes(input.as_bytes(), output.as_bytes(), trim); From fa5d05295f76045b3668fec838072fea23d5c4f8 Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 19 Jun 2024 01:48:52 +0500 Subject: [PATCH 18/26] Replace test issue_default_namespace_on_outermost_element with more formal test in reader-namespaces.rs (Review in whitespace changes ignored mode) --- tests/reader-namespaces.rs | 238 +++++++++++++++++++++--------------- tests/xmlrs_reader_tests.rs | 13 -- 2 files changed, 139 insertions(+), 112 deletions(-) diff --git a/tests/reader-namespaces.rs b/tests/reader-namespaces.rs index dc3d0185..4c4e5581 100644 --- a/tests/reader-namespaces.rs +++ b/tests/reader-namespaces.rs @@ -87,117 +87,157 @@ fn namespace() { ); } -#[test] -fn default_namespace() { - let mut r = NsReader::from_str(r#""#); +mod default_namespace { + use super::*; + use pretty_assertions::assert_eq; - // - match r.read_resolved_event() { - Ok((ns, Start(_))) => assert_eq!(ns, Unbound), - e => panic!( - "expecting outer start element with no namespace, got {:?}", - e - ), - } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(0))); - assert_eq!(it.collect::>(), vec![]); + #[test] + fn event_empty() { + let mut r = NsReader::from_str(""); - // - match r.read_resolved_event() { - Ok((ns, Start(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), - e => panic!( - "expecting inner start element with to resolve to 'www1', got {:?}", - e - ), - } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(1))); - assert_eq!( - it.collect::>(), - vec![(PrefixDeclaration::Default, Namespace(b"www1"))] - ); + let e = match r.read_resolved_event() { + Ok((ns, Empty(e))) => { + assert_eq!(ns, Bound(Namespace(b"ns"))); + e + } + e => panic!("Expecting Empty event, got {:?}", e), + }; - // - match r.read_resolved_event() { - Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), - e => panic!( - "expecting inner end element with to resolve to 'www1', got {:?}", - e - ), - } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(1))); - assert_eq!( - it.collect::>(), - vec![(PrefixDeclaration::Default, Namespace(b"www1"))] - ); + let mut attrs = e + .attributes() + .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) + // we don't care about xmlns attributes for this test + .filter(|kv| kv.key.as_namespace_binding().is_none()) + .map(|Attribute { key: name, value }| { + let (opt_ns, local_name) = r.resolve_attribute(name); + (opt_ns, local_name.into_inner(), value) + }); + assert_eq!( + attrs.next(), + Some((Unbound, &b"attr"[..], Cow::Borrowed(&b"val"[..]))) + ); + assert_eq!(attrs.next(), None); - // very important: a should not be in any namespace. The default namespace only applies to - // the sub-document it is defined on. - match r.read_resolved_event() { - Ok((ns, End(_))) => assert_eq!(ns, Unbound), - e => panic!("expecting outer end element with no namespace, got {:?}", e), + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"ns"))] + ); } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(0))); - assert_eq!(it.collect::>(), vec![]); -} -#[test] -fn default_namespace_reset() { - let mut r = NsReader::from_str(r#""#); + #[test] + fn event_start_end() { + let mut r = NsReader::from_str(r#""#); - // - match r.read_resolved_event() { - Ok((ns, Start(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), - e => panic!( - "expecting outer start element with to resolve to 'www1', got {:?}", - e - ), - } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(1))); - assert_eq!( - it.collect::>(), - vec![(PrefixDeclaration::Default, Namespace(b"www1"))] - ); + // + match r.read_resolved_event() { + Ok((ns, Start(_))) => assert_eq!(ns, Unbound), + e => panic!( + "expecting outer start element with no namespace, got {:?}", + e + ), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(0))); + assert_eq!(it.collect::>(), vec![]); - // - match r.read_resolved_event() { - Ok((ns, Start(_))) => assert_eq!(ns, Unbound), - e => panic!( - "expecting inner start element with no namespace, got {:?}", - e - ), - } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(2))); - assert_eq!(it.collect::>(), vec![]); + // + match r.read_resolved_event() { + Ok((ns, Start(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), + e => panic!( + "expecting inner start element with to resolve to 'www1', got {:?}", + e + ), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"www1"))] + ); - // - match r.read_resolved_event() { - Ok((ns, End(_))) => assert_eq!(ns, Unbound), - e => panic!("expecting inner end element with no namespace, got {:?}", e), + // + match r.read_resolved_event() { + Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), + e => panic!( + "expecting inner end element with to resolve to 'www1', got {:?}", + e + ), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"www1"))] + ); + + // very important: a should not be in any namespace. The default namespace only applies to + // the sub-document it is defined on. + match r.read_resolved_event() { + Ok((ns, End(_))) => assert_eq!(ns, Unbound), + e => panic!("expecting outer end element with no namespace, got {:?}", e), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(0))); + assert_eq!(it.collect::>(), vec![]); } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(2))); - assert_eq!(it.collect::>(), vec![]); - // - match r.read_resolved_event() { - Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), - e => panic!( - "expecting outer end element with to resolve to 'www1', got {:?}", - e - ), + #[test] + fn reset() { + let mut r = NsReader::from_str(r#""#); + + // + match r.read_resolved_event() { + Ok((ns, Start(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), + e => panic!( + "expecting outer start element with to resolve to 'www1', got {:?}", + e + ), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"www1"))] + ); + + // + match r.read_resolved_event() { + Ok((ns, Start(_))) => assert_eq!(ns, Unbound), + e => panic!( + "expecting inner start element with no namespace, got {:?}", + e + ), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(2))); + assert_eq!(it.collect::>(), vec![]); + + // + match r.read_resolved_event() { + Ok((ns, End(_))) => assert_eq!(ns, Unbound), + e => panic!("expecting inner end element with no namespace, got {:?}", e), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(2))); + assert_eq!(it.collect::>(), vec![]); + + // + match r.read_resolved_event() { + Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), + e => panic!( + "expecting outer end element with to resolve to 'www1', got {:?}", + e + ), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"www1"))] + ); } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(1))); - assert_eq!( - it.collect::>(), - vec![(PrefixDeclaration::Default, Namespace(b"www1"))] - ); } /// Single empty element with qualified attributes. diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 6c2c91f3..f172378e 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -225,19 +225,6 @@ fn issue_105_unexpected_double_dash() { ); } -#[test] -fn issue_default_namespace_on_outermost_element() { - // Regression test - test( - r#""#, - r#" - |EmptyElement({urn:foo}hello) - |EndDocument - "#, - true, - ); -} - #[track_caller] fn test(input: &str, output: &str, trim: bool) { test_bytes(input.as_bytes(), output.as_bytes(), trim); From f567a74f505937152aea7e6b9336d8397020abce Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 19 Jun 2024 01:51:07 +0500 Subject: [PATCH 19/26] Move default_ns_* tests into the module default_namespace (Review in whitespace changes ignored mode) --- tests/reader-namespaces.rs | 280 ++++++++++++++++++------------------- 1 file changed, 140 insertions(+), 140 deletions(-) diff --git a/tests/reader-namespaces.rs b/tests/reader-namespaces.rs index 4c4e5581..764b585a 100644 --- a/tests/reader-namespaces.rs +++ b/tests/reader-namespaces.rs @@ -238,6 +238,146 @@ mod default_namespace { vec![(PrefixDeclaration::Default, Namespace(b"www1"))] ); } + + #[test] + fn shadowing_empty() { + let src = ""; + + let mut r = NsReader::from_str(src); + + // + { + match r.read_resolved_event() { + Ok((ns, Start(e))) => { + assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); + assert_eq!(e.name(), QName(b"e")); + } + e => panic!("Expected Start event (), got {:?}", e), + } + + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"urn:example:o"))] + ); + } + + // + { + let e = match r.read_resolved_event() { + Ok((ns, Empty(e))) => { + assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); + assert_eq!(e.name(), QName(b"e")); + e + } + e => panic!("Expecting Empty event, got {:?}", e), + }; + + let mut attrs = e + .attributes() + .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) + // we don't care about xmlns attributes for this test + .filter(|kv| kv.key.as_namespace_binding().is_none()) + .map(|Attribute { key: name, value }| { + let (opt_ns, local_name) = r.resolve_attribute(name); + (opt_ns, local_name.into_inner(), value) + }); + // the attribute should _not_ have a namespace name. The default namespace does not + // apply to attributes. + assert_eq!( + attrs.next(), + Some((Unbound, &b"att1"[..], Cow::Borrowed(&b"a"[..]))) + ); + assert_eq!(attrs.next(), None); + + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(2))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"urn:example:i")),] + ); + } + + // + match r.read_resolved_event() { + Ok((ns, End(e))) => { + assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); + assert_eq!(e.name(), QName(b"e")); + } + e => panic!("Expected End event (), got {:?}", e), + } + let it = r.prefixes(); + assert_eq!(it.size_hint(), (0, Some(1))); + assert_eq!( + it.collect::>(), + vec![(PrefixDeclaration::Default, Namespace(b"urn:example:o"))] + ); + } + + #[test] + fn shadowing_expanded() { + let src = ""; + + let mut r = NsReader::from_str(src); + r.config_mut().expand_empty_elements = true; + + // + { + match r.read_resolved_event() { + Ok((ns, Start(e))) => { + assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); + assert_eq!(e.name(), QName(b"e")); + } + e => panic!("Expected Start event (), got {:?}", e), + } + } + + // + { + let e = match r.read_resolved_event() { + Ok((ns, Start(e))) => { + assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); + assert_eq!(e.name(), QName(b"e")); + e + } + e => panic!("Expecting Start event (), got {:?}", e), + }; + let mut attrs = e + .attributes() + .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) + // we don't care about xmlns attributes for this test + .filter(|kv| kv.key.as_namespace_binding().is_none()) + .map(|Attribute { key: name, value }| { + let (opt_ns, local_name) = r.resolve_attribute(name); + (opt_ns, local_name.into_inner(), value) + }); + // the attribute should _not_ have a namespace name. The default namespace does not + // apply to attributes. + assert_eq!( + attrs.next(), + Some((Unbound, &b"att1"[..], Cow::Borrowed(&b"a"[..]))) + ); + assert_eq!(attrs.next(), None); + } + + // virtual + match r.read_resolved_event() { + Ok((ns, End(e))) => { + assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); + assert_eq!(e.name(), QName(b"e")); + } + e => panic!("Expected End event (), got {:?}", e), + } + // + match r.read_resolved_event() { + Ok((ns, End(e))) => { + assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); + assert_eq!(e.name(), QName(b"e")); + } + e => panic!("Expected End event (), got {:?}", e), + } + } } /// Single empty element with qualified attributes. @@ -337,146 +477,6 @@ fn attributes_empty_ns_expanded() { } } -#[test] -fn default_ns_shadowing_empty() { - let src = ""; - - let mut r = NsReader::from_str(src); - - // - { - match r.read_resolved_event() { - Ok((ns, Start(e))) => { - assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); - assert_eq!(e.name(), QName(b"e")); - } - e => panic!("Expected Start event (), got {:?}", e), - } - - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(1))); - assert_eq!( - it.collect::>(), - vec![(PrefixDeclaration::Default, Namespace(b"urn:example:o"))] - ); - } - - // - { - let e = match r.read_resolved_event() { - Ok((ns, Empty(e))) => { - assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); - assert_eq!(e.name(), QName(b"e")); - e - } - e => panic!("Expecting Empty event, got {:?}", e), - }; - - let mut attrs = e - .attributes() - .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) - // we don't care about xmlns attributes for this test - .filter(|kv| kv.key.as_namespace_binding().is_none()) - .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.resolve_attribute(name); - (opt_ns, local_name.into_inner(), value) - }); - // the attribute should _not_ have a namespace name. The default namespace does not - // apply to attributes. - assert_eq!( - attrs.next(), - Some((Unbound, &b"att1"[..], Cow::Borrowed(&b"a"[..]))) - ); - assert_eq!(attrs.next(), None); - - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(2))); - assert_eq!( - it.collect::>(), - vec![(PrefixDeclaration::Default, Namespace(b"urn:example:i")),] - ); - } - - // - match r.read_resolved_event() { - Ok((ns, End(e))) => { - assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); - assert_eq!(e.name(), QName(b"e")); - } - e => panic!("Expected End event (), got {:?}", e), - } - let it = r.prefixes(); - assert_eq!(it.size_hint(), (0, Some(1))); - assert_eq!( - it.collect::>(), - vec![(PrefixDeclaration::Default, Namespace(b"urn:example:o"))] - ); -} - -#[test] -fn default_ns_shadowing_expanded() { - let src = ""; - - let mut r = NsReader::from_str(src); - r.config_mut().expand_empty_elements = true; - - // - { - match r.read_resolved_event() { - Ok((ns, Start(e))) => { - assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); - assert_eq!(e.name(), QName(b"e")); - } - e => panic!("Expected Start event (), got {:?}", e), - } - } - - // - { - let e = match r.read_resolved_event() { - Ok((ns, Start(e))) => { - assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); - assert_eq!(e.name(), QName(b"e")); - e - } - e => panic!("Expecting Start event (), got {:?}", e), - }; - let mut attrs = e - .attributes() - .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) - // we don't care about xmlns attributes for this test - .filter(|kv| kv.key.as_namespace_binding().is_none()) - .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.resolve_attribute(name); - (opt_ns, local_name.into_inner(), value) - }); - // the attribute should _not_ have a namespace name. The default namespace does not - // apply to attributes. - assert_eq!( - attrs.next(), - Some((Unbound, &b"att1"[..], Cow::Borrowed(&b"a"[..]))) - ); - assert_eq!(attrs.next(), None); - } - - // virtual - match r.read_resolved_event() { - Ok((ns, End(e))) => { - assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); - assert_eq!(e.name(), QName(b"e")); - } - e => panic!("Expected End event (), got {:?}", e), - } - // - match r.read_resolved_event() { - Ok((ns, End(e))) => { - assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); - assert_eq!(e.name(), QName(b"e")); - } - e => panic!("Expected End event (), got {:?}", e), - } -} - /// Although the XML specification [recommends against] the use of names where /// the local name portion begins with the letters "xml" (case insensitive), /// it also specifies, that processors *MUST NOT* treat them as fatal errors. From eff9a86851785a1206727f7d775b15fe1c9627fd Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 20 Jun 2024 01:43:47 +0500 Subject: [PATCH 20/26] Replace test issue_105_unexpected_double_dash with more formal test in reader.rs --- tests/reader.rs | 45 +++++++++++++++++++++++++++++++++++ tests/xmlrs_reader_tests.rs | 47 ------------------------------------- 2 files changed, 45 insertions(+), 47 deletions(-) diff --git a/tests/reader.rs b/tests/reader.rs index 2152cc1b..ae864953 100644 --- a/tests/reader.rs +++ b/tests/reader.rs @@ -211,3 +211,48 @@ fn clone_state() { assert!(matches!(cloned.read_event().unwrap(), Text(_))); assert!(matches!(cloned.read_event().unwrap(), End(_))); } + +/// Ported tests from xml-rs crate from function `issue_105_unexpected_double_dash` +mod double_dash { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn text1() { + let mut r = Reader::from_str("-- "); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("hello"))); + assert_eq!(r.read_event().unwrap(), Text(BytesText::new("-- "))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("hello"))); + } + + #[test] + fn text2() { + let mut r = Reader::from_str("--"); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("hello"))); + assert_eq!(r.read_event().unwrap(), Text(BytesText::new("--"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("hello"))); + } + + #[test] + fn text3() { + let mut r = Reader::from_str("-->"); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("hello"))); + assert_eq!( + r.read_event().unwrap(), + Text(BytesText::from_escaped("-->")) + ); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("hello"))); + } + + #[test] + fn cdata() { + let mut r = Reader::from_str(""); + + assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("hello"))); + assert_eq!(r.read_event().unwrap(), CData(BytesCData::new("--"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("hello"))); + } +} diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index f172378e..20b1315a 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -178,53 +178,6 @@ fn sample_ns_short() { ); } -#[test] -fn issue_105_unexpected_double_dash() { - test( - r#"-- "#, - r#" - |StartElement(hello) - |Characters(-- ) - |EndElement(hello) - |EndDocument - "#, - false, - ); - - test( - r#"--"#, - r#" - |StartElement(hello) - |Characters(--) - |EndElement(hello) - |EndDocument - "#, - false, - ); - - test( - r#"-->"#, - r#" - |StartElement(hello) - |Characters(-->) - |EndElement(hello) - |EndDocument - "#, - false, - ); - - test( - r#""#, - r#" - |StartElement(hello) - |CData(--) - |EndElement(hello) - |EndDocument - "#, - false, - ); -} - #[track_caller] fn test(input: &str, output: &str, trim: bool) { test_bytes(input.as_bytes(), output.as_bytes(), trim); From e1e3489c48a602b968590ea0c5f09260d2c11b30 Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 20 Jun 2024 02:40:23 +0500 Subject: [PATCH 21/26] More strict encoding tests - check that we get expected events As a result, `encoded_characters` test in xmlrs_reader_tests.rs can be removed, because we have a test that checks all characters of Shift_JIS encoding --- tests/encodings.rs | 160 +++++++++++++++++++++++++++--------- tests/xmlrs_reader_tests.rs | 20 ----- 2 files changed, 121 insertions(+), 59 deletions(-) diff --git a/tests/encodings.rs b/tests/encodings.rs index de039ed4..9502adb7 100644 --- a/tests/encodings.rs +++ b/tests/encodings.rs @@ -1,4 +1,4 @@ -use quick_xml::events::Event; +use quick_xml::events::Event::*; use quick_xml::Reader; mod decode { @@ -34,10 +34,10 @@ fn test_koi8_r_encoding() { r.config_mut().trim_text(true); loop { match r.read_event_into(&mut buf) { - Ok(Event::Text(e)) => { + Ok(Text(e)) => { e.unescape().unwrap(); } - Ok(Event::Eof) => break, + Ok(Eof) => break, _ => (), } } @@ -50,6 +50,88 @@ mod detect { use encoding_rs::*; use pretty_assertions::assert_eq; + macro_rules! assert_matches { + ($number:literal : $left:expr, $pattern:pat_param) => {{ + let event = $left; + if !matches!(event, $pattern) { + assert_eq!( + format!("{:#?}", event), + stringify!($pattern), + concat!("Message ", stringify!($number), " is incorrect") + ); + } + }}; + } + macro_rules! check_detection { + ($test:ident, $enc:ident, $file:literal) => { + #[test] + fn $test() { + let mut r = Reader::from_reader( + include_bytes!(concat!("documents/encoding/", $file, ".xml")).as_ref(), + ); + assert_eq!(r.decoder().encoding(), UTF_8); + + let mut buf = Vec::new(); + // XML declaration with encoding + assert_matches!(1: r.read_event_into(&mut buf).unwrap(), Decl(_)); + assert_eq!(r.decoder().encoding(), $enc); + assert_matches!(2: r.read_event_into(&mut buf).unwrap(), Text(_)); // spaces + buf.clear(); + + // Comment with information that this is generated file + assert_matches!(3: r.read_event_into(&mut buf).unwrap(), Comment(_)); + assert_eq!(r.decoder().encoding(), $enc); + assert_matches!(4: r.read_event_into(&mut buf).unwrap(), Text(_)); // spaces + buf.clear(); + + // Open root element tag. Contains 3 attributes: + // - attribute1 - double-quoted. Value - all possible characters in that encoding + // - attribute2 - single-quoted. Value - all possible characters in that encoding + // - unquoted. Name and value - all possible characters in that encoding + assert_matches!(5: r.read_event_into(&mut buf).unwrap(), Start(_)); + assert_eq!(r.decoder().encoding(), $enc); + assert_matches!(6: r.read_event_into(&mut buf).unwrap(), Text(_)); // spaces + buf.clear(); + + // Processing instruction with all possible characters in that encoding + assert_matches!(7: r.read_event_into(&mut buf).unwrap(), PI(_)); + assert_eq!(r.decoder().encoding(), $enc); + assert_matches!(8: r.read_event_into(&mut buf).unwrap(), Text(_)); // spaces + buf.clear(); + + // Comment with all possible characters in that encoding + assert_matches!(9: r.read_event_into(&mut buf).unwrap(), Comment(_)); + assert_eq!(r.decoder().encoding(), $enc); + buf.clear(); + + // Text with all possible characters in that encoding except some + assert_matches!(10: r.read_event_into(&mut buf).unwrap(), Text(_)); + assert_eq!(r.decoder().encoding(), $enc); + buf.clear(); + + // Empty tag with name from all possible characters in that encoding except some + assert_matches!(11: r.read_event_into(&mut buf).unwrap(), Empty(_)); + assert_eq!(r.decoder().encoding(), $enc); + assert_matches!(12: r.read_event_into(&mut buf).unwrap(), Text(_)); // spaces + buf.clear(); + + // CDATA section with all possible characters in that encoding + assert_matches!(13: r.read_event_into(&mut buf).unwrap(), CData(_)); + assert_eq!(r.decoder().encoding(), $enc); + assert_matches!(14: r.read_event_into(&mut buf).unwrap(), Text(_)); // spaces + buf.clear(); + + // Close root element tag + assert_matches!(15: r.read_event_into(&mut buf).unwrap(), End(_)); + assert_eq!(r.decoder().encoding(), $enc); + buf.clear(); + + // Document should end + assert_matches!(16: r.read_event_into(&mut buf).unwrap(), Eof); + assert_eq!(r.decoder().encoding(), $enc); + } + }; + } macro_rules! detect_test { ($test:ident, $enc:ident, $file:literal $($break:stmt)?) => { #[test] @@ -62,7 +144,7 @@ mod detect { let mut buf = Vec::new(); loop { match dbg!(r.read_event_into(&mut buf).unwrap()) { - Event::Eof => break, + Eof => break, _ => {} } assert_eq!(r.decoder().encoding(), $enc); @@ -84,44 +166,44 @@ mod detect { detect_test!(utf16le_bom, UTF_16LE, "utf16le-bom"); // legacy multi-byte encodings (7) - detect_test!(big5, BIG5, "Big5"); - detect_test!(euc_jp, EUC_JP, "EUC-JP"); - detect_test!(euc_kr, EUC_KR, "EUC-KR"); - detect_test!(gb18030, GB18030, "gb18030"); - detect_test!(gbk, GBK, "GBK"); + check_detection!(big5, BIG5, "Big5"); + check_detection!(euc_jp, EUC_JP, "EUC-JP"); + check_detection!(euc_kr, EUC_KR, "EUC-KR"); + check_detection!(gb18030, GB18030, "gb18030"); + check_detection!(gbk, GBK, "GBK"); // TODO: XML in this encoding cannot be parsed successfully until #158 resolves // We only read the first event to ensure, that encoding detected correctly detect_test!(iso_2022_jp, ISO_2022_JP, "ISO-2022-JP" break); - detect_test!(shift_jis, SHIFT_JIS, "Shift_JIS"); + check_detection!(shift_jis, SHIFT_JIS, "Shift_JIS"); // legacy single-byte encodings (19) - detect_test!(ibm866, IBM866, "IBM866"); - detect_test!(iso_8859_2, ISO_8859_2, "ISO-8859-2"); - detect_test!(iso_8859_3, ISO_8859_3, "ISO-8859-3"); - detect_test!(iso_8859_4, ISO_8859_4, "ISO-8859-4"); - detect_test!(iso_8859_5, ISO_8859_5, "ISO-8859-5"); - detect_test!(iso_8859_6, ISO_8859_6, "ISO-8859-6"); - detect_test!(iso_8859_7, ISO_8859_7, "ISO-8859-7"); - detect_test!(iso_8859_8, ISO_8859_8, "ISO-8859-8"); - detect_test!(iso_8859_8_i, ISO_8859_8_I, "ISO-8859-8-I"); - detect_test!(iso_8859_10, ISO_8859_10, "ISO-8859-10"); - detect_test!(iso_8859_13, ISO_8859_13, "ISO-8859-13"); - detect_test!(iso_8859_14, ISO_8859_14, "ISO-8859-14"); - detect_test!(iso_8859_15, ISO_8859_15, "ISO-8859-15"); - detect_test!(iso_8859_16, ISO_8859_16, "ISO-8859-16"); - detect_test!(koi8_r, KOI8_R, "KOI8-R"); - detect_test!(koi8_u, KOI8_U, "KOI8-U"); - detect_test!(macintosh, MACINTOSH, "macintosh"); - detect_test!(windows_874, WINDOWS_874, "windows-874"); - detect_test!(windows_1250, WINDOWS_1250, "windows-1250"); - detect_test!(windows_1251, WINDOWS_1251, "windows-1251"); - detect_test!(windows_1252, WINDOWS_1252, "windows-1252"); - detect_test!(windows_1253, WINDOWS_1253, "windows-1253"); - detect_test!(windows_1254, WINDOWS_1254, "windows-1254"); - detect_test!(windows_1255, WINDOWS_1255, "windows-1255"); - detect_test!(windows_1256, WINDOWS_1256, "windows-1256"); - detect_test!(windows_1257, WINDOWS_1257, "windows-1257"); - detect_test!(windows_1258, WINDOWS_1258, "windows-1258"); - detect_test!(x_mac_cyrillic, X_MAC_CYRILLIC, "x-mac-cyrillic"); - detect_test!(x_user_defined, X_USER_DEFINED, "x-user-defined"); + check_detection!(ibm866, IBM866, "IBM866"); + check_detection!(iso_8859_2, ISO_8859_2, "ISO-8859-2"); + check_detection!(iso_8859_3, ISO_8859_3, "ISO-8859-3"); + check_detection!(iso_8859_4, ISO_8859_4, "ISO-8859-4"); + check_detection!(iso_8859_5, ISO_8859_5, "ISO-8859-5"); + check_detection!(iso_8859_6, ISO_8859_6, "ISO-8859-6"); + check_detection!(iso_8859_7, ISO_8859_7, "ISO-8859-7"); + check_detection!(iso_8859_8, ISO_8859_8, "ISO-8859-8"); + check_detection!(iso_8859_8_i, ISO_8859_8_I, "ISO-8859-8-I"); + check_detection!(iso_8859_10, ISO_8859_10, "ISO-8859-10"); + check_detection!(iso_8859_13, ISO_8859_13, "ISO-8859-13"); + check_detection!(iso_8859_14, ISO_8859_14, "ISO-8859-14"); + check_detection!(iso_8859_15, ISO_8859_15, "ISO-8859-15"); + check_detection!(iso_8859_16, ISO_8859_16, "ISO-8859-16"); + check_detection!(koi8_r, KOI8_R, "KOI8-R"); + check_detection!(koi8_u, KOI8_U, "KOI8-U"); + check_detection!(macintosh, MACINTOSH, "macintosh"); + check_detection!(windows_874, WINDOWS_874, "windows-874"); + check_detection!(windows_1250, WINDOWS_1250, "windows-1250"); + check_detection!(windows_1251, WINDOWS_1251, "windows-1251"); + check_detection!(windows_1252, WINDOWS_1252, "windows-1252"); + check_detection!(windows_1253, WINDOWS_1253, "windows-1253"); + check_detection!(windows_1254, WINDOWS_1254, "windows-1254"); + check_detection!(windows_1255, WINDOWS_1255, "windows-1255"); + check_detection!(windows_1256, WINDOWS_1256, "windows-1256"); + check_detection!(windows_1257, WINDOWS_1257, "windows-1257"); + check_detection!(windows_1258, WINDOWS_1258, "windows-1258"); + check_detection!(x_mac_cyrillic, X_MAC_CYRILLIC, "x-mac-cyrillic"); + check_detection!(x_user_defined, X_USER_DEFINED, "x-user-defined"); } diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 20b1315a..ba33d64d 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -99,26 +99,6 @@ fn escaped_characters_html() { ) } -#[cfg(feature = "encoding")] -#[test] -fn encoded_characters() { - test_bytes( - b"\ - \n\ - \x82\xA0\x82\xA2\x82\xA4\ - ", - " - |StartDocument(1.0, Shift_JIS) - |StartElement(a) - |Characters(あいう) - |EndElement(a) - |EndDocument - " - .as_bytes(), - true, - ) -} - // #[test] // fn sample_3_short() { // test( From 625c74eb4389839721b478a478c438336e761612 Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 20 Jun 2024 21:00:29 +0500 Subject: [PATCH 22/26] Replace test bom_removed_from_initial_text with more formal test in tests/encodings.rs Also add more checks to test for buffered reader related to BOM --- src/reader/buffered_reader.rs | 15 ++++++++++++--- tests/encodings.rs | 20 +++++++++++++++++++- tests/xmlrs_reader_tests.rs | 18 ------------------ 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index e395e4ae..1658d925 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -482,7 +482,10 @@ mod test { let mut buf = Vec::new(); assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_into(&mut buf).unwrap(); + assert!(matches!( + reader.read_event_into(&mut buf).unwrap(), + Event::Decl(_) + )); assert_eq!(reader.decoder().encoding(), WINDOWS_1251); assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); @@ -497,10 +500,16 @@ mod test { let mut buf = Vec::new(); assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_into(&mut buf).unwrap(); + assert!(matches!( + reader.read_event_into(&mut buf).unwrap(), + Event::Decl(_) + )); assert_eq!(reader.decoder().encoding(), UTF_16LE); - reader.read_event_into(&mut buf).unwrap(); + assert!(matches!( + reader.read_event_into(&mut buf).unwrap(), + Event::Decl(_) + )); assert_eq!(reader.decoder().encoding(), UTF_16LE); assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); diff --git a/tests/encodings.rs b/tests/encodings.rs index 9502adb7..92ea1715 100644 --- a/tests/encodings.rs +++ b/tests/encodings.rs @@ -1,4 +1,4 @@ -use quick_xml::events::Event::*; +use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*}; use quick_xml::Reader; mod decode { @@ -207,3 +207,21 @@ mod detect { check_detection!(x_mac_cyrillic, X_MAC_CYRILLIC, "x-mac-cyrillic"); check_detection!(x_user_defined, X_USER_DEFINED, "x-user-defined"); } + +#[test] +fn bom_removed_from_initial_text() { + let mut r = + Reader::from_str("\u{FEFF}asdftext"); + + assert_eq!(r.read_event().unwrap(), Text(BytesText::new("asdf"))); + assert_eq!( + r.read_event().unwrap(), + Start(BytesStart::from_content( + "paired attr1=\"value1\" attr2=\"value2\"", + 6 + )) + ); + assert_eq!(r.read_event().unwrap(), Text(BytesText::new("text"))); + assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("paired"))); + assert_eq!(r.read_event().unwrap(), Eof); +} diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index ba33d64d..e52034da 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -52,24 +52,6 @@ fn html5() { ); } -#[test] -fn bom_removed_from_initial_text() { - let expected = r#" - |Characters(asdf) - |StartElement(paired [attr1="value1", attr2="value2"]) - |Characters(text) - |EndElement(paired) - |EndDocument - "#; - - // BOM right up against the text - test( - "\u{FEFF}asdftext", - expected, - true, - ); -} - #[test] fn escaped_characters() { test( From e9cab5969dcb074beef8a0c631789fdf7bffae88 Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 20 Jun 2024 21:33:28 +0500 Subject: [PATCH 23/26] Replace test `escaped_characters` with `escape::test_escape` The test in xmlrs_reader_tests.rs actually calls `unescape` function to get resulting strings that is matched in the test, so we just test that function instead --- src/escape.rs | 5 ++++- tests/xmlrs_reader_tests.rs | 14 -------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/src/escape.rs b/src/escape.rs index ac6c9add..769dd6af 100644 --- a/src/escape.rs +++ b/src/escape.rs @@ -1850,7 +1850,10 @@ fn test_unescape() { assert_eq!(unchanged, Cow::Borrowed("test")); assert!(matches!(unchanged, Cow::Borrowed(_))); - assert_eq!(unescape("<test>").unwrap(), ""); + assert_eq!( + unescape("<&test'">").unwrap(), + "<&test'\">" + ); assert_eq!(unescape("0").unwrap(), "0"); assert_eq!(unescape("0").unwrap(), "0"); assert!(unescape("&foo;").is_err()); diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index e52034da..898d928c 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -52,20 +52,6 @@ fn html5() { ); } -#[test] -fn escaped_characters() { - test( - r#"'a' < '&'"#, - r#" - |StartElement(e [attr=""Hello""]) - |Characters('a' < '&') - |EndElement(e) - |EndDocument - "#, - true, - ) -} - #[cfg(feature = "escape-html")] #[test] fn escaped_characters_html() { From 33219442f697a389135a1f20504ae17ee3e5e7c1 Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 20 Jun 2024 21:50:08 +0500 Subject: [PATCH 24/26] Remove not working and commented tests sample_3 and sample_4 tests was commented because they are ported from xml-rs crate and uses some reader settings which quick-xml does not have. No sense to keep them. sample_5 was ignored because parsing of UTF-16 coded documents not yet implemented. When such support will be added it will have its own test, no need to keep this one. --- tests/README.md | 11 +----- tests/documents/sample_3.xml | 13 ------- tests/documents/sample_3_full.txt | 33 ----------------- tests/documents/sample_3_short.txt | 14 -------- tests/documents/sample_4.xml | 15 -------- tests/documents/sample_4_full.txt | 24 ------------- tests/documents/sample_4_short.txt | 16 --------- tests/documents/sample_5_full.txt | 23 ------------ tests/documents/sample_5_short.txt | 15 -------- tests/documents/sample_5_utf16bom.xml | Bin 662 -> 0 bytes tests/xmlrs_reader_tests.rs | 50 -------------------------- 11 files changed, 1 insertion(+), 213 deletions(-) delete mode 100644 tests/documents/sample_3.xml delete mode 100644 tests/documents/sample_3_full.txt delete mode 100644 tests/documents/sample_3_short.txt delete mode 100644 tests/documents/sample_4.xml delete mode 100644 tests/documents/sample_4_full.txt delete mode 100644 tests/documents/sample_4_short.txt delete mode 100644 tests/documents/sample_5_full.txt delete mode 100644 tests/documents/sample_5_short.txt delete mode 100644 tests/documents/sample_5_utf16bom.xml diff --git a/tests/README.md b/tests/README.md index 316ee2d5..ac94f6f4 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,4 +1,4 @@ -# Document descrptions +# Document descriptions document.xml medium length, mostly empty tags, a few short attributes per element, no escaping @@ -25,16 +25,7 @@ sample_1_short.txt sample_1_full.txt sample_2.xml sample_2_short.txt -sample_3.xml sample_2_full.txt -sample_3_short.txt -sample_3_full.txt -sample_4.xml -sample_4_short.txt -sample_4_full.txt -sample_5_short.txt -sample_5_utf16bom.xml -sample_5_full.txt sample_ns_short.txt sample_ns.xml short, lots of namespaces, no escapes diff --git a/tests/documents/sample_3.xml b/tests/documents/sample_3.xml deleted file mode 100644 index 657e37d1..00000000 --- a/tests/documents/sample_3.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - test - kkss" = ddd' > - ddddd!e3--> - test - kkss" = ddd' > - ddddd!e3-->