diff --git a/src/id3/mod.rs b/src/id3/mod.rs index 0e326053b..34619e70c 100644 --- a/src/id3/mod.rs +++ b/src/id3/mod.rs @@ -8,6 +8,7 @@ pub mod v2; use crate::error::{ErrorKind, LoftyError, Result}; use crate::macros::try_vec; +use crate::util::text::utf8_decode_str; use v2::header::Id3v2Header; use std::io::{Read, Seek, SeekFrom}; @@ -30,7 +31,7 @@ where if &lyrics3v2[7..] == b"LYRICS200" { header = Some(()); - let lyrics_size = std::str::from_utf8(&lyrics3v2[..7])?; + let lyrics_size = utf8_decode_str(&lyrics3v2[..7])?; let lyrics_size = lyrics_size.parse::().map_err(|_| { LoftyError::new(ErrorKind::TextDecode( "Lyrics3v2 tag has an invalid size string", diff --git a/src/id3/v2/frame/header.rs b/src/id3/v2/frame/header.rs index 81b33e5e9..969ecab65 100644 --- a/src/id3/v2/frame/header.rs +++ b/src/id3/v2/frame/header.rs @@ -3,6 +3,7 @@ use crate::error::{Id3v2Error, Id3v2ErrorKind, Result}; use crate::id3::v2::util::synchsafe::SynchsafeInteger; use crate::id3::v2::util::upgrade::{upgrade_v2, upgrade_v3}; use crate::id3::v2::FrameId; +use crate::util::text::utf8_decode_str; use std::borrow::Cow; use std::io::Read; @@ -74,7 +75,7 @@ where } let id_bytes = &header[..id_end]; - let id_str = std::str::from_utf8(id_bytes) + let id_str = utf8_decode_str(id_bytes) .map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadFrameId(id_bytes.to_vec())))?; // Now upgrade the FrameId diff --git a/src/id3/v2/items/ownership_frame.rs b/src/id3/v2/items/ownership_frame.rs index 79a878113..3b3c8e515 100644 --- a/src/id3/v2/items/ownership_frame.rs +++ b/src/id3/v2/items/ownership_frame.rs @@ -1,5 +1,5 @@ use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result}; -use crate::util::text::{decode_text, encode_text, utf8_decode, TextEncoding}; +use crate::util::text::{decode_text, encode_text, utf8_decode_str, TextEncoding}; use std::hash::Hash; use std::io::Read; @@ -47,10 +47,10 @@ impl OwnershipFrame { .ok_or_else(|| LoftyError::new(ErrorKind::TextDecode("Found invalid encoding")))?; let price_paid = decode_text(reader, TextEncoding::Latin1, true)?.content; - let mut date_bytes = vec![0u8; 8]; + let mut date_bytes = [0u8; 8]; reader.read_exact(&mut date_bytes)?; - let date_of_purchase = utf8_decode(date_bytes)?; + let date_of_purchase = utf8_decode_str(&date_bytes)?.to_owned(); let seller = decode_text(reader, encoding, false)?.content; diff --git a/src/iff/chunk.rs b/src/iff/chunk.rs index 8036fbc70..01719284b 100644 --- a/src/iff/chunk.rs +++ b/src/iff/chunk.rs @@ -49,9 +49,7 @@ impl Chunks { let cont = self.content(data)?; self.correct_position(data)?; - let value_str = std::str::from_utf8(&cont)?; - - Ok(value_str.trim_end_matches('\0').to_string()) + utf8_decode(cont) } pub fn read_pstring(&mut self, data: &mut R, size: Option) -> Result diff --git a/src/iff/wav/tag/read.rs b/src/iff/wav/tag/read.rs index c189f7681..eceb109ad 100644 --- a/src/iff/wav/tag/read.rs +++ b/src/iff/wav/tag/read.rs @@ -2,6 +2,7 @@ use super::RIFFInfoList; use crate::error::Result; use crate::iff::chunk::Chunks; use crate::macros::decode_err; +use crate::util::text::utf8_decode_str; use std::io::{Read, Seek}; @@ -17,15 +18,15 @@ where R: Read + Seek, { while data.stream_position()? != end && chunks.next(data).is_ok() { - let key_str = String::from_utf8(chunks.fourcc.to_vec()) + let key_str = utf8_decode_str(&chunks.fourcc) .map_err(|_| decode_err!(Wav, "Non UTF-8 item key found in RIFF INFO"))?; - if !verify_key(&key_str) { + if !verify_key(key_str) { decode_err!(@BAIL Wav, "RIFF INFO item key contains invalid characters"); } tag.items.push(( - key_str, + key_str.to_owned(), chunks .read_cstring(data) .map_err(|_| decode_err!(Wav, "Failed to read RIFF INFO item value"))?, diff --git a/src/mp4/atom_info.rs b/src/mp4/atom_info.rs index c4cd102c5..b10f1ee96 100644 --- a/src/mp4/atom_info.rs +++ b/src/mp4/atom_info.rs @@ -3,6 +3,7 @@ use crate::macros::{err, try_vec}; use crate::probe::ParsingMode; use crate::tag::item::ItemKey; use crate::tag::TagType; +use crate::util::text::utf8_decode; use std::borrow::Cow; use std::io::{Read, Seek, SeekFrom}; @@ -247,7 +248,7 @@ where let mut content = try_vec![0; (len - 12) as usize]; data.read_exact(&mut content)?; - String::from_utf8(content).map_err(|_| { + utf8_decode(content).map_err(|_| { LoftyError::new(ErrorKind::BadAtom( "Found a non UTF-8 string while reading freeform identifier", )) diff --git a/src/mp4/read.rs b/src/mp4/read.rs index 977428eb4..c5651f365 100644 --- a/src/mp4/read.rs +++ b/src/mp4/read.rs @@ -6,6 +6,7 @@ use crate::error::{ErrorKind, LoftyError, Result}; use crate::macros::{decode_err, err}; use crate::probe::{ParseOptions, ParsingMode}; use crate::traits::SeekStreamLen; +use crate::util::text::utf8_decode_str; use std::io::{Read, Seek, SeekFrom}; @@ -163,12 +164,13 @@ where decode_err!(@BAIL Mp4, "\"ftyp\" atom too short"); } - let mut major_brand = vec![0; 4]; + let mut major_brand = [0u8; 4]; reader.read_exact(&mut major_brand)?; reader.seek(SeekFrom::Current((atom.len - 12) as i64))?; - String::from_utf8(major_brand) + utf8_decode_str(&major_brand) + .map(ToOwned::to_owned) .map_err(|_| LoftyError::new(ErrorKind::BadAtom("Unable to parse \"ftyp\"'s major brand"))) } diff --git a/src/ogg/read.rs b/src/ogg/read.rs index ae2b78aa2..188c6964d 100644 --- a/src/ogg/read.rs +++ b/src/ogg/read.rs @@ -4,7 +4,7 @@ use crate::error::{ErrorKind, LoftyError, Result}; use crate::macros::{decode_err, err, parse_mode_choice}; use crate::picture::{MimeType, Picture, PictureInformation, PictureType}; use crate::probe::ParsingMode; -use crate::util::text::{utf16_decode, utf8_decode}; +use crate::util::text::{utf16_decode, utf8_decode, utf8_decode_str}; use std::borrow::Cow; use std::io::{Read, Seek, SeekFrom}; @@ -149,8 +149,8 @@ where // SAFETY: We just verified that all of the bytes fall within the subset of ASCII let key = unsafe { String::from_utf8_unchecked(k.to_vec()) }; - match utf8_decode(value.to_vec()) { - Ok(value) => tag.items.push((key, value)), + match utf8_decode_str(value) { + Ok(value) => tag.items.push((key, value.to_owned())), Err(e) => { if parse_mode == ParsingMode::Strict { return Err(e); diff --git a/src/picture.rs b/src/picture.rs index 735947471..f2866a964 100644 --- a/src/picture.rs +++ b/src/picture.rs @@ -1,7 +1,7 @@ use crate::error::{ErrorKind, LoftyError, Result}; use crate::macros::err; use crate::probe::ParsingMode; -use crate::util::text::utf8_decode; +use crate::util::text::utf8_decode_str; use std::borrow::Cow; use std::fmt::{Debug, Formatter}; @@ -672,7 +672,7 @@ impl Picture { err!(SizeMismatch); } - let mime_type_str = std::str::from_utf8(&content[8..8 + mime_len])?; + let mime_type_str = utf8_decode_str(&content[8..8 + mime_len])?; size -= mime_len; reader.seek(SeekFrom::Current(mime_len as i64))?; @@ -684,8 +684,8 @@ impl Picture { if desc_len > 0 && desc_len < size { let pos = 12 + mime_len; - if let Ok(desc) = utf8_decode(content[pos..pos + desc_len].to_vec()) { - description = Some(desc.into()); + if let Ok(desc) = utf8_decode_str(&content[pos..pos + desc_len]) { + description = Some(desc.to_owned().into()); } size -= desc_len; diff --git a/src/util/text.rs b/src/util/text.rs index d46edc7f1..063e5d483 100644 --- a/src/util/text.rs +++ b/src/util/text.rs @@ -185,6 +185,12 @@ pub(crate) fn utf8_decode(bytes: Vec) -> Result { .map_err(Into::into) } +pub(crate) fn utf8_decode_str(bytes: &[u8]) -> Result<&str> { + std::str::from_utf8(bytes) + .map(trim_end_nulls_str) + .map_err(Into::into) +} + pub(crate) fn utf16_decode(words: &[u16]) -> Result { String::from_utf16(words) .map(|mut text| { @@ -242,6 +248,10 @@ pub(crate) fn trim_end_nulls(text: &mut String) { } } +pub(crate) fn trim_end_nulls_str(text: &str) -> &str { + text.trim_end_matches('\0') +} + fn utf16_encode( text: &str, endianness: fn(u16) -> [u8; 2],