From 786bee16a91e4d1b50974608029b277bb5e493d4 Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Sun, 20 Oct 2024 15:54:59 +0200 Subject: [PATCH] Make `escape` and it variants take a `impl Into>` argument and implement `From<(&'a str, Cow<'a, str>)>` on `Attribute` --- Changelog.md | 5 ++++- src/escape.rs | 14 +++++++++----- src/events/attributes.rs | 25 +++++++++++++++++++++++++ src/events/mod.rs | 15 ++++++--------- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/Changelog.md b/Changelog.md index b9a3ed4f..a205615d 100644 --- a/Changelog.md +++ b/Changelog.md @@ -32,6 +32,8 @@ - [#820]: Classify output of the `Serializer` by returning an enumeration with kind of written data - [#823]: Do not allow serialization of consequent primitives, for example `Vec` or `Vec` in `$value` fields. They cannot be deserialized back with the same result +- [#827]: Make `escape` and it variants take a `impl Into>` argument and implement + `From<(&'a str, Cow<'a, str>)>` on `Attribute` [#227]: https://github.com/tafia/quick-xml/issues/227 [#655]: https://github.com/tafia/quick-xml/issues/655 @@ -39,6 +41,7 @@ [#811]: https://github.com/tafia/quick-xml/pull/811 [#820]: https://github.com/tafia/quick-xml/pull/820 [#823]: https://github.com/tafia/quick-xml/pull/823 +[#827]: https://github.com/tafia/quick-xml/pull/827 ## 0.36.2 -- 2024-09-20 @@ -979,7 +982,7 @@ serde >= 1.0.181 ## 0.16.0 - feat: (breaking change) set failure and encoding_rs crates as optional. -You should now use respectively `use-failure` and `encoding` features to get the old behavior + You should now use respectively `use-failure` and `encoding` features to get the old behavior - perf: improve perf using memchr3 iterator. Reading is 18% better on benches ## 0.15.0 diff --git a/src/escape.rs b/src/escape.rs index bd5cfbe3..7175ed88 100644 --- a/src/escape.rs +++ b/src/escape.rs @@ -101,7 +101,7 @@ impl std::error::Error for EscapeError { /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` -pub fn escape(raw: &str) -> Cow { +pub fn escape<'a>(raw: impl Into>) -> Cow<'a, str> { _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"')) } @@ -126,7 +126,7 @@ pub fn escape(raw: &str) -> Cow { /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` -pub fn partial_escape(raw: &str) -> Cow { +pub fn partial_escape<'a>(raw: impl Into>) -> Cow<'a, str> { _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&')) } @@ -143,13 +143,17 @@ pub fn partial_escape(raw: &str) -> Cow { /// | `&` | `&` /// /// [requires]: https://www.w3.org/TR/xml11/#syntax -pub fn minimal_escape(raw: &str) -> Cow { +pub fn minimal_escape<'a>(raw: impl Into>) -> Cow<'a, str> { _escape(raw, |ch| matches!(ch, b'<' | b'&')) } /// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`, /// `&`, `'`, `"`) with their corresponding xml escaped value. -pub(crate) fn _escape bool>(raw: &str, escape_chars: F) -> Cow { +pub(crate) fn _escape<'a, F: Fn(u8) -> bool>( + raw: impl Into>, + escape_chars: F, +) -> Cow<'a, str> { + let raw = raw.into(); let bytes = raw.as_bytes(); let mut escaped = None; let mut iter = bytes.iter(); @@ -192,7 +196,7 @@ pub(crate) fn _escape bool>(raw: &str, escape_chars: F) -> Cow // if unsafe code will be allowed Cow::Owned(String::from_utf8(escaped).unwrap()) } else { - Cow::Borrowed(raw) + raw } } diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 4a2a34ab..dd35c4d5 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -153,6 +153,31 @@ impl<'a> From<(&'a str, &'a str)> for Attribute<'a> { } } +impl<'a> From<(&'a str, Cow<'a, str>)> for Attribute<'a> { + /// Creates new attribute from text representation. + /// Key is stored as-is, but the value will be escaped. + /// + /// # Examples + /// + /// ``` + /// # use std::borrow::Cow; + /// use pretty_assertions::assert_eq; + /// use quick_xml::events::attributes::Attribute; + /// + /// let features = Attribute::from(("features", Cow::Borrowed("Bells & whistles"))); + /// assert_eq!(features.value, "Bells & whistles".as_bytes()); + /// ``` + fn from(val: (&'a str, Cow<'a, str>)) -> Attribute<'a> { + Attribute { + key: QName(val.0.as_bytes()), + value: match escape(val.1) { + Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), + Cow::Owned(s) => Cow::Owned(s.into_bytes()), + }, + } + } +} + impl<'a> From> for Attribute<'a> { #[inline] fn from(attr: Attr<&'a [u8]>) -> Self { diff --git a/src/events/mod.rs b/src/events/mod.rs index 15e3b0b1..704c4ef6 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -746,9 +746,8 @@ impl<'a> BytesCData<'a> { pub fn escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( - match escape(&decoded) { - // Because result is borrowed, no replacements was done and we can use original content - Cow::Borrowed(_) => self.content, + match escape(decoded) { + Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()), Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), @@ -771,9 +770,8 @@ impl<'a> BytesCData<'a> { pub fn partial_escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( - match partial_escape(&decoded) { - // Because result is borrowed, no replacements was done and we can use original content - Cow::Borrowed(_) => self.content, + match partial_escape(decoded) { + Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()), Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), @@ -795,9 +793,8 @@ impl<'a> BytesCData<'a> { pub fn minimal_escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( - match minimal_escape(&decoded) { - // Because result is borrowed, no replacements was done and we can use original content - Cow::Borrowed(_) => self.content, + match minimal_escape(decoded) { + Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()), Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(),