Add utf8_decode_str()/trim_end_nulls_str() to avoid/defer allocations

Serial-ATA · Oct 26, 2023 · 9985a55 · 9985a55
1 parent 7398602
commit 9985a55
Show file tree

Hide file tree

Showing 10 changed files with 35 additions and 21 deletions.
diff --git a/src/id3/mod.rs b/src/id3/mod.rs
@@ -8,6 +8,7 @@ pub mod v2;
 
 use crate::error::{ErrorKind, LoftyError, Result};
 use crate::macros::try_vec;
+use crate::util::text::utf8_decode_str;
 use v2::header::Id3v2Header;
 
 use std::io::{Read, Seek, SeekFrom};
@@ -30,7 +31,7 @@ where
 	if &lyrics3v2[7..] == b"LYRICS200" {
 		header = Some(());
 
-		let lyrics_size = std::str::from_utf8(&lyrics3v2[..7])?;
+		let lyrics_size = utf8_decode_str(&lyrics3v2[..7])?;
 		let lyrics_size = lyrics_size.parse::<u32>().map_err(|_| {
 			LoftyError::new(ErrorKind::TextDecode(
 				"Lyrics3v2 tag has an invalid size string",

diff --git a/src/id3/v2/frame/header.rs b/src/id3/v2/frame/header.rs
@@ -3,6 +3,7 @@ use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
 use crate::id3::v2::util::synchsafe::SynchsafeInteger;
 use crate::id3::v2::util::upgrade::{upgrade_v2, upgrade_v3};
 use crate::id3::v2::FrameId;
+use crate::util::text::utf8_decode_str;
 
 use std::borrow::Cow;
 use std::io::Read;
@@ -74,7 +75,7 @@ where
 	}
 
 	let id_bytes = &header[..id_end];
-	let id_str = std::str::from_utf8(id_bytes)
+	let id_str = utf8_decode_str(id_bytes)
 		.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadFrameId(id_bytes.to_vec())))?;
 
 	// Now upgrade the FrameId

diff --git a/src/id3/v2/items/ownership_frame.rs b/src/id3/v2/items/ownership_frame.rs
@@ -1,5 +1,5 @@
 use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
-use crate::util::text::{decode_text, encode_text, utf8_decode, TextEncoding};
+use crate::util::text::{decode_text, encode_text, utf8_decode_str, TextEncoding};
 
 use std::hash::Hash;
 use std::io::Read;
@@ -47,10 +47,10 @@ impl OwnershipFrame {
 			.ok_or_else(|| LoftyError::new(ErrorKind::TextDecode("Found invalid encoding")))?;
 		let price_paid = decode_text(reader, TextEncoding::Latin1, true)?.content;
 
-		let mut date_bytes = vec![0u8; 8];
+		let mut date_bytes = [0u8; 8];
 		reader.read_exact(&mut date_bytes)?;
 
-		let date_of_purchase = utf8_decode(date_bytes)?;
+		let date_of_purchase = utf8_decode_str(&date_bytes)?.to_owned();
 
 		let seller = decode_text(reader, encoding, false)?.content;
 

diff --git a/src/iff/chunk.rs b/src/iff/chunk.rs
@@ -49,9 +49,7 @@ impl<B: ByteOrder> Chunks<B> {
 		let cont = self.content(data)?;
 		self.correct_position(data)?;
 
-		let value_str = std::str::from_utf8(&cont)?;
-
-		Ok(value_str.trim_end_matches('\0').to_string())
+		utf8_decode(cont)
 	}
 
 	pub fn read_pstring<R>(&mut self, data: &mut R, size: Option<u32>) -> Result<String>

diff --git a/src/iff/wav/tag/read.rs b/src/iff/wav/tag/read.rs
@@ -2,6 +2,7 @@ use super::RIFFInfoList;
 use crate::error::Result;
 use crate::iff::chunk::Chunks;
 use crate::macros::decode_err;
+use crate::util::text::utf8_decode_str;
 
 use std::io::{Read, Seek};
 
@@ -17,15 +18,15 @@ where
 	R: Read + Seek,
 {
 	while data.stream_position()? != end && chunks.next(data).is_ok() {
-		let key_str = String::from_utf8(chunks.fourcc.to_vec())
+		let key_str = utf8_decode_str(&chunks.fourcc)
 			.map_err(|_| decode_err!(Wav, "Non UTF-8 item key found in RIFF INFO"))?;
 
-		if !verify_key(&key_str) {
+		if !verify_key(key_str) {
 			decode_err!(@BAIL Wav, "RIFF INFO item key contains invalid characters");
 		}
 
 		tag.items.push((
-			key_str,
+			key_str.to_owned(),
 			chunks
 				.read_cstring(data)
 				.map_err(|_| decode_err!(Wav, "Failed to read RIFF INFO item value"))?,

diff --git a/src/mp4/atom_info.rs b/src/mp4/atom_info.rs
@@ -3,6 +3,7 @@ use crate::macros::{err, try_vec};
 use crate::probe::ParsingMode;
 use crate::tag::item::ItemKey;
 use crate::tag::TagType;
+use crate::util::text::utf8_decode;
 
 use std::borrow::Cow;
 use std::io::{Read, Seek, SeekFrom};
@@ -247,7 +248,7 @@ where
 			let mut content = try_vec![0; (len - 12) as usize];
 			data.read_exact(&mut content)?;
 
-			String::from_utf8(content).map_err(|_| {
+			utf8_decode(content).map_err(|_| {
 				LoftyError::new(ErrorKind::BadAtom(
 					"Found a non UTF-8 string while reading freeform identifier",
 				))

diff --git a/src/mp4/read.rs b/src/mp4/read.rs
@@ -6,6 +6,7 @@ use crate::error::{ErrorKind, LoftyError, Result};
 use crate::macros::{decode_err, err};
 use crate::probe::{ParseOptions, ParsingMode};
 use crate::traits::SeekStreamLen;
+use crate::util::text::utf8_decode_str;
 
 use std::io::{Read, Seek, SeekFrom};
 
@@ -163,12 +164,13 @@ where
 		decode_err!(@BAIL Mp4, "\"ftyp\" atom too short");
 	}
 
-	let mut major_brand = vec![0; 4];
+	let mut major_brand = [0u8; 4];
 	reader.read_exact(&mut major_brand)?;
 
 	reader.seek(SeekFrom::Current((atom.len - 12) as i64))?;
 
-	String::from_utf8(major_brand)
+	utf8_decode_str(&major_brand)
+		.map(ToOwned::to_owned)
 		.map_err(|_| LoftyError::new(ErrorKind::BadAtom("Unable to parse \"ftyp\"'s major brand")))
 }
 

diff --git a/src/ogg/read.rs b/src/ogg/read.rs
@@ -4,7 +4,7 @@ use crate::error::{ErrorKind, LoftyError, Result};
 use crate::macros::{decode_err, err, parse_mode_choice};
 use crate::picture::{MimeType, Picture, PictureInformation, PictureType};
 use crate::probe::ParsingMode;
-use crate::util::text::{utf16_decode, utf8_decode};
+use crate::util::text::{utf16_decode, utf8_decode, utf8_decode_str};
 
 use std::borrow::Cow;
 use std::io::{Read, Seek, SeekFrom};
@@ -149,8 +149,8 @@ where
 				// SAFETY: We just verified that all of the bytes fall within the subset of ASCII
 				let key = unsafe { String::from_utf8_unchecked(k.to_vec()) };
 
-				match utf8_decode(value.to_vec()) {
-					Ok(value) => tag.items.push((key, value)),
+				match utf8_decode_str(value) {
+					Ok(value) => tag.items.push((key, value.to_owned())),
 					Err(e) => {
 						if parse_mode == ParsingMode::Strict {
 							return Err(e);

diff --git a/src/picture.rs b/src/picture.rs
@@ -1,7 +1,7 @@
 use crate::error::{ErrorKind, LoftyError, Result};
 use crate::macros::err;
 use crate::probe::ParsingMode;
-use crate::util::text::utf8_decode;
+use crate::util::text::utf8_decode_str;
 
 use std::borrow::Cow;
 use std::fmt::{Debug, Formatter};
@@ -672,7 +672,7 @@ impl Picture {
 			err!(SizeMismatch);
 		}
 
-		let mime_type_str = std::str::from_utf8(&content[8..8 + mime_len])?;
+		let mime_type_str = utf8_decode_str(&content[8..8 + mime_len])?;
 		size -= mime_len;
 
 		reader.seek(SeekFrom::Current(mime_len as i64))?;
@@ -684,8 +684,8 @@ impl Picture {
 		if desc_len > 0 && desc_len < size {
 			let pos = 12 + mime_len;
 
-			if let Ok(desc) = utf8_decode(content[pos..pos + desc_len].to_vec()) {
-				description = Some(desc.into());
+			if let Ok(desc) = utf8_decode_str(&content[pos..pos + desc_len]) {
+				description = Some(desc.to_owned().into());
 			}
 
 			size -= desc_len;

diff --git a/src/util/text.rs b/src/util/text.rs
@@ -185,6 +185,12 @@ pub(crate) fn utf8_decode(bytes: Vec<u8>) -> Result<String> {
 		.map_err(Into::into)
 }
 
+pub(crate) fn utf8_decode_str(bytes: &[u8]) -> Result<&str> {
+	std::str::from_utf8(bytes)
+		.map(trim_end_nulls_str)
+		.map_err(Into::into)
+}
+
 pub(crate) fn utf16_decode(words: &[u16]) -> Result<String> {
 	String::from_utf16(words)
 		.map(|mut text| {
@@ -242,6 +248,10 @@ pub(crate) fn trim_end_nulls(text: &mut String) {
 	}
 }
 
+pub(crate) fn trim_end_nulls_str(text: &str) -> &str {
+	text.trim_end_matches('\0')
+}
+
 fn utf16_encode(
 	text: &str,
 	endianness: fn(u16) -> [u8; 2],