diff --git a/src/align.rs b/src/align.rs index 0cc79a08d..0320e25da 100644 --- a/src/align.rs +++ b/src/align.rs @@ -47,6 +47,12 @@ impl_ArrayDefault!(i16); impl_ArrayDefault!(i32); impl_ArrayDefault!(u16); +pub trait AlignedByteChunk +where + Self: Sized, +{ +} + macro_rules! def_align { ($align:literal, $name:ident) => { #[derive(Clone, Copy)] @@ -84,6 +90,8 @@ macro_rules! def_align { ::default() } } + + impl AlignedByteChunk for $name<[u8; $align]> {} }; } @@ -95,19 +103,25 @@ def_align!(16, Align16); def_align!(32, Align32); def_align!(64, Align64); -/// A [`Vec`] that uses a 64-byte aligned allocation. +/// A [`Vec`] that uses [`mem::size_of`]`::()` aligned allocations. /// /// Only works with [`Copy`] types so that we don't have to handle drop logic. -pub struct AlignedVec64 { - inner: Vec>>, +pub struct AlignedVec { + inner: Vec>, /// The number of `T`s in [`Self::inner`] currently initialized. len: usize, _phantom: PhantomData, } -impl AlignedVec64 { +impl AlignedVec { + /// Must check in all constructors. + const fn check_byte_chunk_type_is_aligned() { + assert!(mem::size_of::() == mem::align_of::()); + } + pub const fn new() -> Self { + Self::check_byte_chunk_type_is_aligned(); Self { inner: Vec::new(), len: 0, @@ -147,13 +161,13 @@ impl AlignedVec64 { // Resize the underlying vector to have enough chunks for the new length. // - // NOTE: We don't need to `drop` any elements if the `Vec` is truncated since - // `T: Copy`. + // NOTE: We don't need to `drop` any elements if the `Vec` is truncated since `T: Copy`. let new_bytes = mem::size_of::() * new_len; - let new_chunks = if (new_bytes % 64) == 0 { - new_bytes / 64 + let chunk_size = mem::size_of::(); + let new_chunks = if (new_bytes % chunk_size) == 0 { + new_bytes / chunk_size } else { - (new_bytes / 64) + 1 + (new_bytes / chunk_size) + 1 }; self.inner.resize_with(new_chunks, MaybeUninit::uninit); @@ -170,7 +184,7 @@ impl AlignedVec64 { } } -impl Deref for AlignedVec64 { +impl Deref for AlignedVec { type Target = [T]; fn deref(&self) -> &Self::Target { @@ -178,15 +192,17 @@ impl Deref for AlignedVec64 { } } -impl DerefMut for AlignedVec64 { +impl DerefMut for AlignedVec { fn deref_mut(&mut self) -> &mut Self::Target { self.as_mut_slice() } } // NOTE: Custom impl so that we don't require `T: Default`. -impl Default for AlignedVec64 { +impl Default for AlignedVec { fn default() -> Self { Self::new() } } + +pub type AlignedVec64 = AlignedVec>; diff --git a/src/internal.rs b/src/internal.rs index 2e707df32..c3e5ec475 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -424,22 +424,28 @@ pub struct CodedBlockInfo { pub txtp: [u8; 3], /* plane */ } -// TODO: Temporary `Default` impl to support using `mem::take` to manually drop -// this field. Remove once the context is fully owned and can be dropped -// normally. #[derive(Default)] #[repr(C)] pub struct Rav1dFrameContext_frame_thread { - pub next_tile_row: [c_int; 2], /* 0: reconstruction, 1: entropy */ - // indexed using t->by * f->b4_stride + t->bx + /// Indices: 0: reconstruction, 1: entropy. + pub next_tile_row: [c_int; 2], + + /// Indexed using `t.by * f.b4_stride + t.bx`. pub b: Vec, + pub cbi: Vec, - // indexed using (t->by >> 1) * (f->b4_stride >> 1) + (t->bx >> 1) - pub pal: AlignedVec64<[[u16; 8]; 3]>, /* [3 plane][8 idx] */ - // iterated over inside tile state + + /// Indexed using `(t.by >> 1) * (f.b4_stride >> 1) + (t.bx >> 1)`. + /// Inner indices are `[3 plane][8 idx]`. + pub pal: AlignedVec64<[[u16; 8]; 3]>, + + /// Iterated over inside tile state. pub pal_idx: AlignedVec64, - pub cf: AlignedVec64, // AlignedVec64 - // start offsets per tile + + /// [`AlignedVec64`]`<`[`DynCoef`]`>` + pub cf: AlignedVec64, + + /// Start offsets per tile pub tile_start_off: Vec, } diff --git a/src/lib.rs b/src/lib.rs index d8d12db54..b0efb8d26 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,6 +33,7 @@ use crate::src::fg_apply; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dContextTaskThread; use crate::src::internal::Rav1dContextTaskType; +use crate::src::internal::Rav1dFrameContext_frame_thread; use crate::src::internal::Rav1dFrameData; use crate::src::internal::Rav1dTask; use crate::src::internal::Rav1dTaskContext; @@ -82,6 +83,7 @@ use std::mem; use std::mem::MaybeUninit; use std::process::abort; use std::ptr; +use std::ptr::addr_of_mut; use std::ptr::NonNull; use std::slice; use std::sync::atomic::AtomicI32; @@ -324,6 +326,7 @@ pub(crate) unsafe fn rav1d_open(c_out: &mut *mut Rav1dContext, s: &Rav1dSettings let mut n: c_uint = 0 as c_int as c_uint; while n < (*c).n_fc { let f: &mut Rav1dFrameData = &mut *((*c).fc).offset(n as isize); + addr_of_mut!(f.frame_thread).write(Default::default()); if n_tc > 1 { f.task_thread.lock = Mutex::new(()); f.task_thread.cond = Condvar::new();