From 15c1aed9bf98e17046606e87b13ce8d06627a95d Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:15:28 -0400 Subject: [PATCH] MP4: Allow invalid atom sizes with `ParsingMode::Relaxed` --- CHANGELOG.md | 1 + lofty/src/mp4/atom_info.rs | 17 ++++++++++++++- lofty/src/mp4/write.rs | 44 +++++++++++++++++++++++++------------- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b31053290..c883cd21f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **Timestamp**: `Timestamp::parse` with empty inputs will return `None` when not using `ParsingMode::Strict` ([PR](https://github.com/Serial-ATA/lofty-rs/pull/416)) +- **MP4**: Atoms with sizes greater than the remaining file size will be ignored with `ParsingMode::Relaxed` ([PR](https://github.com/Serial-ATA/lofty-rs/pull/433)) ### Fixed - **Fuzzing** (Thanks [@qarmin](https://github.com/qarmin)!) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/TODO)): diff --git a/lofty/src/mp4/atom_info.rs b/lofty/src/mp4/atom_info.rs index ca6c5fd05..55db62e5c 100644 --- a/lofty/src/mp4/atom_info.rs +++ b/lofty/src/mp4/atom_info.rs @@ -180,7 +180,14 @@ impl AtomInfo { // `len` includes itself and the identifier if (len - ATOM_HEADER_LEN) > reader_size { - data.seek(SeekFrom::Current(-4))?; + log::warn!("Encountered an atom with an invalid length, stopping"); + + if parse_mode == ParsingMode::Relaxed { + // Seek to the end, as we cannot gather anything else from the file + data.seek(SeekFrom::End(0))?; + return Ok(None); + } + err!(SizeMismatch); } @@ -204,6 +211,14 @@ impl AtomInfo { ident: atom_ident, })) } + + pub(crate) fn header_size(&self) -> u64 { + if !self.extended { + return ATOM_HEADER_LEN; + } + + ATOM_HEADER_LEN + 8 + } } fn parse_freeform( diff --git a/lofty/src/mp4/write.rs b/lofty/src/mp4/write.rs index b37b752d4..87a13bf37 100644 --- a/lofty/src/mp4/write.rs +++ b/lofty/src/mp4/write.rs @@ -1,13 +1,14 @@ use crate::config::ParsingMode; use crate::error::{LoftyError, Result}; +use crate::io::{FileLike, Length, Truncate}; +use crate::macros::err; use crate::mp4::atom_info::{AtomIdent, AtomInfo, IDENTIFIER_LEN}; -use crate::mp4::read::skip_unneeded; +use crate::mp4::read::{meta_is_full, skip_unneeded}; use std::cell::{RefCell, RefMut}; use std::io::{Cursor, Read, Seek, SeekFrom, Write}; use std::ops::RangeBounds; -use crate::io::{FileLike, Length, Truncate}; use byteorder::{BigEndian, WriteBytesExt}; /// A wrapper around [`AtomInfo`] that allows us to track all of the children of containers we deem important @@ -17,8 +18,10 @@ pub(super) struct ContextualAtom { pub(crate) children: Vec, } +const META_ATOM_IDENT: AtomIdent<'_> = AtomIdent::Fourcc(*b"meta"); + #[rustfmt::skip] -const IMPORTANT_CONTAINERS: [[u8; 4]; 7] = [ +const IMPORTANT_CONTAINERS: &[[u8; 4]] = &[ *b"moov", *b"udta", *b"moof", @@ -44,26 +47,37 @@ impl ContextualAtom { return Ok(None); }; - let mut children = Vec::new(); - - let AtomIdent::Fourcc(fourcc) = info.ident else { - *reader_len = reader_len.saturating_sub(info.len); - return Ok(Some(ContextualAtom { info, children })); - }; + match info.ident { + AtomIdent::Fourcc(ident) if IMPORTANT_CONTAINERS.contains(&ident) => {}, + _ => { + *reader_len = reader_len.saturating_sub(info.len); + + // We don't care about the atom's contents + skip_unneeded(reader, info.extended, info.len)?; + return Ok(Some(ContextualAtom { + info, + children: Vec::new(), + })); + }, + } - if !IMPORTANT_CONTAINERS.contains(&fourcc) { - *reader_len = reader_len.saturating_sub(info.len); + let mut len = info.len - info.header_size(); + let mut children = Vec::new(); - // We don't care about the atom's contents - skip_unneeded(reader, info.extended, info.len)?; - return Ok(Some(ContextualAtom { info, children })); + // See meta_is_full for details + if info.ident == META_ATOM_IDENT && meta_is_full(reader)? { + len -= 4; } - let mut len = info.len - 8; while let Some(child) = Self::read(reader, &mut len, parse_mode)? { children.push(child); } + if len != 0 { + // TODO: Print the container ident + err!(BadAtom("Unable to read entire container")); + } + *reader_len = reader_len.saturating_sub(info.len); // reader.seek(SeekFrom::Current(*reader_len as i64))?; // Skip any remaining bytes Ok(Some(ContextualAtom { info, children }))