Skip to content

Commit

Permalink
Add utf8_decode_str()/trim_end_nulls_str() to avoid/defer allocations
Browse files Browse the repository at this point in the history
  • Loading branch information
uklotzde authored and Serial-ATA committed Oct 26, 2023
1 parent 7398602 commit 9985a55
Show file tree
Hide file tree
Showing 10 changed files with 35 additions and 21 deletions.
3 changes: 2 additions & 1 deletion src/id3/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub mod v2;

use crate::error::{ErrorKind, LoftyError, Result};
use crate::macros::try_vec;
use crate::util::text::utf8_decode_str;
use v2::header::Id3v2Header;

use std::io::{Read, Seek, SeekFrom};
Expand All @@ -30,7 +31,7 @@ where
if &lyrics3v2[7..] == b"LYRICS200" {
header = Some(());

let lyrics_size = std::str::from_utf8(&lyrics3v2[..7])?;
let lyrics_size = utf8_decode_str(&lyrics3v2[..7])?;
let lyrics_size = lyrics_size.parse::<u32>().map_err(|_| {
LoftyError::new(ErrorKind::TextDecode(
"Lyrics3v2 tag has an invalid size string",
Expand Down
3 changes: 2 additions & 1 deletion src/id3/v2/frame/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
use crate::id3::v2::util::synchsafe::SynchsafeInteger;
use crate::id3::v2::util::upgrade::{upgrade_v2, upgrade_v3};
use crate::id3::v2::FrameId;
use crate::util::text::utf8_decode_str;

use std::borrow::Cow;
use std::io::Read;
Expand Down Expand Up @@ -74,7 +75,7 @@ where
}

let id_bytes = &header[..id_end];
let id_str = std::str::from_utf8(id_bytes)
let id_str = utf8_decode_str(id_bytes)
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadFrameId(id_bytes.to_vec())))?;

// Now upgrade the FrameId
Expand Down
6 changes: 3 additions & 3 deletions src/id3/v2/items/ownership_frame.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
use crate::util::text::{decode_text, encode_text, utf8_decode, TextEncoding};
use crate::util::text::{decode_text, encode_text, utf8_decode_str, TextEncoding};

use std::hash::Hash;
use std::io::Read;
Expand Down Expand Up @@ -47,10 +47,10 @@ impl OwnershipFrame {
.ok_or_else(|| LoftyError::new(ErrorKind::TextDecode("Found invalid encoding")))?;
let price_paid = decode_text(reader, TextEncoding::Latin1, true)?.content;

let mut date_bytes = vec![0u8; 8];
let mut date_bytes = [0u8; 8];
reader.read_exact(&mut date_bytes)?;

let date_of_purchase = utf8_decode(date_bytes)?;
let date_of_purchase = utf8_decode_str(&date_bytes)?.to_owned();

let seller = decode_text(reader, encoding, false)?.content;

Expand Down
4 changes: 1 addition & 3 deletions src/iff/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ impl<B: ByteOrder> Chunks<B> {
let cont = self.content(data)?;
self.correct_position(data)?;

let value_str = std::str::from_utf8(&cont)?;

Ok(value_str.trim_end_matches('\0').to_string())
utf8_decode(cont)
}

pub fn read_pstring<R>(&mut self, data: &mut R, size: Option<u32>) -> Result<String>
Expand Down
7 changes: 4 additions & 3 deletions src/iff/wav/tag/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use super::RIFFInfoList;
use crate::error::Result;
use crate::iff::chunk::Chunks;
use crate::macros::decode_err;
use crate::util::text::utf8_decode_str;

use std::io::{Read, Seek};

Expand All @@ -17,15 +18,15 @@ where
R: Read + Seek,
{
while data.stream_position()? != end && chunks.next(data).is_ok() {
let key_str = String::from_utf8(chunks.fourcc.to_vec())
let key_str = utf8_decode_str(&chunks.fourcc)
.map_err(|_| decode_err!(Wav, "Non UTF-8 item key found in RIFF INFO"))?;

if !verify_key(&key_str) {
if !verify_key(key_str) {
decode_err!(@BAIL Wav, "RIFF INFO item key contains invalid characters");
}

tag.items.push((
key_str,
key_str.to_owned(),
chunks
.read_cstring(data)
.map_err(|_| decode_err!(Wav, "Failed to read RIFF INFO item value"))?,
Expand Down
3 changes: 2 additions & 1 deletion src/mp4/atom_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::macros::{err, try_vec};
use crate::probe::ParsingMode;
use crate::tag::item::ItemKey;
use crate::tag::TagType;
use crate::util::text::utf8_decode;

use std::borrow::Cow;
use std::io::{Read, Seek, SeekFrom};
Expand Down Expand Up @@ -247,7 +248,7 @@ where
let mut content = try_vec![0; (len - 12) as usize];
data.read_exact(&mut content)?;

String::from_utf8(content).map_err(|_| {
utf8_decode(content).map_err(|_| {
LoftyError::new(ErrorKind::BadAtom(
"Found a non UTF-8 string while reading freeform identifier",
))
Expand Down
6 changes: 4 additions & 2 deletions src/mp4/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::error::{ErrorKind, LoftyError, Result};
use crate::macros::{decode_err, err};
use crate::probe::{ParseOptions, ParsingMode};
use crate::traits::SeekStreamLen;
use crate::util::text::utf8_decode_str;

use std::io::{Read, Seek, SeekFrom};

Expand Down Expand Up @@ -163,12 +164,13 @@ where
decode_err!(@BAIL Mp4, "\"ftyp\" atom too short");
}

let mut major_brand = vec![0; 4];
let mut major_brand = [0u8; 4];
reader.read_exact(&mut major_brand)?;

reader.seek(SeekFrom::Current((atom.len - 12) as i64))?;

String::from_utf8(major_brand)
utf8_decode_str(&major_brand)
.map(ToOwned::to_owned)
.map_err(|_| LoftyError::new(ErrorKind::BadAtom("Unable to parse \"ftyp\"'s major brand")))
}

Expand Down
6 changes: 3 additions & 3 deletions src/ogg/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::error::{ErrorKind, LoftyError, Result};
use crate::macros::{decode_err, err, parse_mode_choice};
use crate::picture::{MimeType, Picture, PictureInformation, PictureType};
use crate::probe::ParsingMode;
use crate::util::text::{utf16_decode, utf8_decode};
use crate::util::text::{utf16_decode, utf8_decode, utf8_decode_str};

use std::borrow::Cow;
use std::io::{Read, Seek, SeekFrom};
Expand Down Expand Up @@ -149,8 +149,8 @@ where
// SAFETY: We just verified that all of the bytes fall within the subset of ASCII
let key = unsafe { String::from_utf8_unchecked(k.to_vec()) };

match utf8_decode(value.to_vec()) {
Ok(value) => tag.items.push((key, value)),
match utf8_decode_str(value) {
Ok(value) => tag.items.push((key, value.to_owned())),
Err(e) => {
if parse_mode == ParsingMode::Strict {
return Err(e);
Expand Down
8 changes: 4 additions & 4 deletions src/picture.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::error::{ErrorKind, LoftyError, Result};
use crate::macros::err;
use crate::probe::ParsingMode;
use crate::util::text::utf8_decode;
use crate::util::text::utf8_decode_str;

use std::borrow::Cow;
use std::fmt::{Debug, Formatter};
Expand Down Expand Up @@ -672,7 +672,7 @@ impl Picture {
err!(SizeMismatch);
}

let mime_type_str = std::str::from_utf8(&content[8..8 + mime_len])?;
let mime_type_str = utf8_decode_str(&content[8..8 + mime_len])?;
size -= mime_len;

reader.seek(SeekFrom::Current(mime_len as i64))?;
Expand All @@ -684,8 +684,8 @@ impl Picture {
if desc_len > 0 && desc_len < size {
let pos = 12 + mime_len;

if let Ok(desc) = utf8_decode(content[pos..pos + desc_len].to_vec()) {
description = Some(desc.into());
if let Ok(desc) = utf8_decode_str(&content[pos..pos + desc_len]) {
description = Some(desc.to_owned().into());
}

size -= desc_len;
Expand Down
10 changes: 10 additions & 0 deletions src/util/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ pub(crate) fn utf8_decode(bytes: Vec<u8>) -> Result<String> {
.map_err(Into::into)
}

pub(crate) fn utf8_decode_str(bytes: &[u8]) -> Result<&str> {
std::str::from_utf8(bytes)
.map(trim_end_nulls_str)
.map_err(Into::into)
}

pub(crate) fn utf16_decode(words: &[u16]) -> Result<String> {
String::from_utf16(words)
.map(|mut text| {
Expand Down Expand Up @@ -242,6 +248,10 @@ pub(crate) fn trim_end_nulls(text: &mut String) {
}
}

pub(crate) fn trim_end_nulls_str(text: &str) -> &str {
text.trim_end_matches('\0')
}

fn utf16_encode(
text: &str,
endianness: fn(u16) -> [u8; 2],
Expand Down

0 comments on commit 9985a55

Please sign in to comment.