Skip to content

Commit

Permalink
struct MemPool: Rewrite Rav1dMemPool as fully safe and use it for…
Browse files Browse the repository at this point in the history
… `Rav1dContext::picture_pool` (#1005)

* Fixes #641.

This rewrites from scratch the `unsafe` `Rav1dMemPool` as the safe
`MemPool`. There are a few important differences here:
* `Rav1dMemPool` used `Box<[u8]>` essentially. `MemPool` uses `Vec<T>`
(`T = u8` in this case), which saves on some reallocations, as
`Rav1dMemPool` would only reuse an allocation if it was exactly the same
size.
* By returning `Vec`s, the caller can decide how initialization is done,
as `MemPool` only does the capacity allocation.

Since these picture allocations are large, this code is
performance-critical. When running on
`./tests/large/chimera_10b_1080p.ivf -l 100`, I observed a ~5%
performance regression initially (401 ms baseline, 415 ms new). However,
if I do not zero out the data buffer (using `Vec::resize_with`, which
only initializes the new elements from the previous use), then I observe
a 5% performance improvement (376 ms). As I explain in a comment, I do
`Vec::resize_with` in `cfg!(debug_assertions)`, but in release mode, I
unsafely call `Vec::set_len`. The intermediate slices of the data that
are materialized are technically UB, since the data is uninitialized.
However, it should always be written to before read, and since this is
so performance-critical, I think this is worth it.

That said, sometimes the benchmarking seemed noisy, so someone else can
take a look, too, if they'd like.
  • Loading branch information
kkysen authored May 1, 2024
2 parents 4de250c + 5f01943 commit 4b9221e
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 156 deletions.
21 changes: 21 additions & 0 deletions include/dav1d/picture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,27 @@ pub struct Dav1dPicAllocator {
#[repr(C)]
pub(crate) struct Rav1dPicAllocator {
/// See [`Dav1dPicAllocator::cookie`].
///
/// # Safety
///
/// If [`Self::is_default`]`()`, then this cookie is a reference to
/// [`Rav1dContext::picture_pool`], a `&Arc<MemPool<MaybeUninit<u8>>`.
/// Thus, its lifetime is that of `&c.picture_pool`,
/// so the lifetime of the `&`[`Rav1dContext`].
/// This is used from `dav1d_default_picture_alloc`
/// ([`Self::default`]`().alloc_picture_callback`),
/// which is called from [`Self::alloc_picture_data`],
/// which is called further up on the call stack with a `&`[`Rav1dContext`].
/// Thus, the lifetime will always be valid where used.
///
/// Note that this is an `&Arc<MemPool<MaybeUninit<u8>>` turned into a raw pointer,
/// not an [`Arc::into_raw`] of that [`Arc`].
/// This is because storing the [`Arc`] would require C to
/// free data owned by a [`Dav1dPicAllocator`] potentially,
/// which it may not do, as there are no current APIs for doing so.
///
/// [`Rav1dContext::picture_pool`]: crate::src::internal::Rav1dContext::picture_pool
/// [`Rav1dContext`]: crate::src::internal::Rav1dContext
pub cookie: *mut c_void,

/// See [`Dav1dPicAllocator::alloc_picture_callback`].
Expand Down
4 changes: 2 additions & 2 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ use crate::src::log::Rav1dLogger;
use crate::src::loopfilter::Rav1dLoopFilterDSPContext;
use crate::src::looprestoration::Rav1dLoopRestorationDSPContext;
use crate::src::mc::Rav1dMCDSPContext;
use crate::src::mem::Rav1dMemPool;
use crate::src::mem::MemPool;
use crate::src::msac::MsacContext;
use crate::src::pal::Rav1dPalDSPContext;
use crate::src::picture::PictureFlags;
Expand Down Expand Up @@ -376,7 +376,7 @@ pub struct Rav1dContext {

pub(crate) logger: Option<Rav1dLogger>,

pub(crate) picture_pool: *mut Rav1dMemPool,
pub(crate) picture_pool: Arc<MemPool<MaybeUninit<u8>>>,
}

impl Rav1dContext {
Expand Down
16 changes: 8 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ use crate::src::log::Rav1dLog as _;
use crate::src::mem::rav1d_alloc_aligned;
use crate::src::mem::rav1d_free_aligned;
use crate::src::mem::rav1d_freep_aligned;
use crate::src::mem::rav1d_mem_pool_end;
use crate::src::mem::rav1d_mem_pool_init;
use crate::src::obu::rav1d_parse_obus;
use crate::src::obu::rav1d_parse_sequence_header;
use crate::src::pal::rav1d_pal_dsp_init;
Expand Down Expand Up @@ -213,16 +211,18 @@ pub(crate) unsafe fn rav1d_open(c_out: &mut *mut Rav1dContext, s: &Rav1dSettings
(*c).inloop_filters = s.inloop_filters;
(*c).decode_frame_type = s.decode_frame_type;
(*c).cached_error_props = Default::default();
addr_of_mut!((*c).picture_pool).write(Default::default());
if (*c).allocator.alloc_picture_callback == dav1d_default_picture_alloc
&& (*c).allocator.release_picture_callback == dav1d_default_picture_release
{
if !((*c).allocator.cookie).is_null() {
if !(*c).allocator.cookie.is_null() {
return error(c, c_out);
}
if rav1d_mem_pool_init(&mut (*c).picture_pool).is_err() {
return error(c, c_out);
}
(*c).allocator.cookie = (*c).picture_pool as *mut c_void;
// SAFETY: When `allocator.is_default()`, `allocator.cookie` should be a `&c.picture_pool`.
// See `Rav1dPicAllocator::cookie` docs for more, including an analysis of the lifetime.
(*c).allocator.cookie = ptr::from_ref(&(*c).picture_pool)
.cast::<c_void>()
.cast_mut();
} else if (*c).allocator.alloc_picture_callback == dav1d_default_picture_alloc
|| (*c).allocator.release_picture_callback == dav1d_default_picture_release
{
Expand Down Expand Up @@ -811,7 +811,7 @@ impl Drop for Rav1dContext {
let _ = mem::take(&mut self.mastering_display);
let _ = mem::take(&mut self.content_light);
let _ = mem::take(&mut self.itut_t35);
rav1d_mem_pool_end(self.picture_pool);
let _ = mem::take(&mut self.picture_pool);
}
}
}
Expand Down
155 changes: 31 additions & 124 deletions src/mem.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,40 @@
use crate::src::error::Rav1dError::ENOMEM;
use crate::src::error::Rav1dResult;
use libc::free;
use libc::malloc;
use libc::posix_memalign;
use libc::pthread_mutex_destroy;
use libc::pthread_mutex_init;
use libc::pthread_mutex_lock;
use libc::pthread_mutex_t;
use libc::pthread_mutex_unlock;
use libc::pthread_mutexattr_t;
use libc::uintptr_t;
use std::ffi::c_int;
use std::ffi::c_void;
use std::sync::Mutex;

#[repr(C)]
pub struct Rav1dMemPool {
pub lock: pthread_mutex_t,
pub buf: *mut Rav1dMemPoolBuffer,
pub ref_cnt: c_int,
pub end: c_int,
pub struct MemPool<T> {
bufs: Mutex<Vec<Vec<T>>>,
}

#[repr(C)]
pub struct Rav1dMemPoolBuffer {
pub data: *mut c_void,
pub next: *mut Rav1dMemPoolBuffer,
impl<T> MemPool<T> {
pub const fn new() -> Self {
Self {
bufs: Mutex::new(Vec::new()),
}
}

pub fn pop(&self, size: usize) -> Vec<T> {
if let Some(mut buf) = self.bufs.lock().unwrap().pop() {
if size > buf.capacity() {
// TODO fallible allocation
buf.reserve(size - buf.len());
}
return buf;
}
// TODO fallible allocation
Vec::with_capacity(size)
}

pub fn push(&self, buf: Vec<T>) {
self.bufs.lock().unwrap().push(buf);
}
}

impl<T> Default for MemPool<T> {
fn default() -> Self {
Self::new()
}
}

#[inline]
Expand Down Expand Up @@ -52,106 +62,3 @@ pub unsafe fn rav1d_freep_aligned(ptr: *mut c_void) {
*mem = 0 as *mut c_void;
}
}

#[cold]
unsafe fn mem_pool_destroy(pool: *mut Rav1dMemPool) {
pthread_mutex_destroy(&mut (*pool).lock);
free(pool as *mut c_void);
}

pub unsafe fn rav1d_mem_pool_push(pool: *mut Rav1dMemPool, buf: *mut Rav1dMemPoolBuffer) {
pthread_mutex_lock(&mut (*pool).lock);
(*pool).ref_cnt -= 1;
let ref_cnt = (*pool).ref_cnt;
if (*pool).end == 0 {
(*buf).next = (*pool).buf;
(*pool).buf = buf;
pthread_mutex_unlock(&mut (*pool).lock);
if !(ref_cnt > 0) {
unreachable!();
}
} else {
pthread_mutex_unlock(&mut (*pool).lock);
rav1d_free_aligned((*buf).data);
if ref_cnt == 0 {
mem_pool_destroy(pool);
}
};
}

pub unsafe fn rav1d_mem_pool_pop(pool: *mut Rav1dMemPool, size: usize) -> *mut Rav1dMemPoolBuffer {
if size & ::core::mem::size_of::<*mut c_void>().wrapping_sub(1) != 0 {
unreachable!();
}
pthread_mutex_lock(&mut (*pool).lock);
let mut buf: *mut Rav1dMemPoolBuffer = (*pool).buf;
(*pool).ref_cnt += 1;
let mut data: *mut u8;
if !buf.is_null() {
(*pool).buf = (*buf).next;
pthread_mutex_unlock(&mut (*pool).lock);
data = (*buf).data as *mut u8;
if (buf as uintptr_t).wrapping_sub(data as uintptr_t) == size {
return buf;
}
rav1d_free_aligned(data as *mut c_void);
} else {
pthread_mutex_unlock(&mut (*pool).lock);
}
data = rav1d_alloc_aligned(
size.wrapping_add(::core::mem::size_of::<Rav1dMemPoolBuffer>()),
64,
) as *mut u8;
if data.is_null() {
pthread_mutex_lock(&mut (*pool).lock);
(*pool).ref_cnt -= 1;
let ref_cnt = (*pool).ref_cnt;
pthread_mutex_unlock(&mut (*pool).lock);
if ref_cnt == 0 {
mem_pool_destroy(pool);
}
return 0 as *mut Rav1dMemPoolBuffer;
}
buf = data.offset(size as isize) as *mut Rav1dMemPoolBuffer;
(*buf).data = data as *mut c_void;
return buf;
}

#[cold]
pub unsafe fn rav1d_mem_pool_init(ppool: *mut *mut Rav1dMemPool) -> Rav1dResult {
let pool: *mut Rav1dMemPool =
malloc(::core::mem::size_of::<Rav1dMemPool>()) as *mut Rav1dMemPool;
if !pool.is_null() {
if pthread_mutex_init(&mut (*pool).lock, 0 as *const pthread_mutexattr_t) == 0 {
(*pool).buf = 0 as *mut Rav1dMemPoolBuffer;
(*pool).ref_cnt = 1 as c_int;
(*pool).end = 0 as c_int;
*ppool = pool;
return Ok(());
}
free(pool as *mut c_void);
}
*ppool = 0 as *mut Rav1dMemPool;
return Err(ENOMEM);
}

#[cold]
pub unsafe fn rav1d_mem_pool_end(pool: *mut Rav1dMemPool) {
if !pool.is_null() {
pthread_mutex_lock(&mut (*pool).lock);
let mut buf: *mut Rav1dMemPoolBuffer = (*pool).buf;
(*pool).ref_cnt -= 1;
let ref_cnt = (*pool).ref_cnt;
(*pool).buf = 0 as *mut Rav1dMemPoolBuffer;
(*pool).end = 1 as c_int;
pthread_mutex_unlock(&mut (*pool).lock);
while !buf.is_null() {
let data: *mut c_void = (*buf).data;
buf = (*buf).next;
rav1d_free_aligned(data);
}
if ref_cnt == 0 {
mem_pool_destroy(pool);
}
}
}
59 changes: 37 additions & 22 deletions src/picture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,22 @@ use crate::include::dav1d::picture::Rav1dPictureParameters;
use crate::include::dav1d::picture::RAV1D_PICTURE_ALIGNMENT;
use crate::src::error::Dav1dResult;
use crate::src::error::Rav1dError::EGeneric;
use crate::src::error::Rav1dError::ENOMEM;
use crate::src::error::Rav1dResult;
use crate::src::internal::Rav1dContext;
use crate::src::internal::Rav1dFrameData;
use crate::src::log::Rav1dLog as _;
use crate::src::log::Rav1dLogger;
use crate::src::mem::rav1d_mem_pool_pop;
use crate::src::mem::rav1d_mem_pool_push;
use crate::src::mem::Rav1dMemPool;
use crate::src::mem::Rav1dMemPoolBuffer;
use crate::src::mem::MemPool;
use atomig::Atom;
use atomig::AtomLogic;
use bitflags::bitflags;
use libc::ptrdiff_t;
use std::ffi::c_int;
use std::ffi::c_void;
use std::mem;
use std::mem::MaybeUninit;
use std::ptr;
use std::ptr::NonNull;
use std::slice;
use std::sync::atomic::AtomicU32;
use std::sync::atomic::Ordering;
use std::sync::Arc;
Expand Down Expand Up @@ -79,11 +75,22 @@ pub(crate) struct Rav1dThreadPicture {
pub progress: Option<Arc<[AtomicU32; 2]>>,
}

struct MemPoolBuf<T> {
/// This is an [`Arc`] because a [`Rav1dPicture`] can outlive
/// its [`Rav1dContext`], and that's how the API was designed.
/// If it were changed to require [`Rav1dContext`] to outlive
/// any [`Rav1dPicture`]s it creates (a reasonable API I think,
/// and easy to do with Rust lifetimes), then we wouldn't need
/// the [`Arc`] here. But it's not the round-tripping through C
/// that requires this, just how the API was designed.
pool: Arc<MemPool<T>>,
buf: Vec<T>,
}

pub unsafe extern "C" fn dav1d_default_picture_alloc(
p_c: *mut Dav1dPicture,
cookie: *mut c_void,
) -> Dav1dResult {
assert!(::core::mem::size_of::<Rav1dMemPoolBuffer>() <= RAV1D_PICTURE_ALIGNMENT);
let p = p_c.read().to::<Rav1dPicture>();
let hbd = (p.p.bpc > 8) as c_int;
let aligned_w = p.p.w + 127 & !127;
Expand All @@ -103,15 +110,20 @@ pub unsafe extern "C" fn dav1d_default_picture_alloc(
let y_sz = (y_stride * aligned_h as isize) as usize;
let uv_sz = (uv_stride * (aligned_h >> ss_ver) as isize) as usize;
let pic_size = y_sz + 2 * uv_sz;
let buf = rav1d_mem_pool_pop(
cookie as *mut Rav1dMemPool,
pic_size + RAV1D_PICTURE_ALIGNMENT - ::core::mem::size_of::<Rav1dMemPoolBuffer>(),
);
if buf.is_null() {
return Rav1dResult::<()>::Err(ENOMEM).into();
}

let data = slice::from_raw_parts_mut((*buf).data as *mut u8, pic_size);
let pool = &*cookie.cast::<Arc<MemPool<MaybeUninit<u8>>>>();
let pool = pool.clone();
let pic_cap = pic_size + RAV1D_PICTURE_ALIGNMENT;
// TODO fallible allocation
let mut buf = pool.pop(pic_cap);
buf.resize_with(pic_cap, MaybeUninit::uninit);
// We have to `Box` this because `Dav1dPicture::allocator_data` is only 8 bytes.
let mut buf = Box::new(MemPoolBuf { pool, buf });
let data = &mut buf.buf[..];
// SAFETY: `Rav1dPicAllocator::alloc_picture_callback` requires that these are `RAV1D_PICTURE_ALIGNMENT`-aligned.
let align_offset = data.as_ptr().align_offset(RAV1D_PICTURE_ALIGNMENT);
let data = &mut data[align_offset..][..pic_size];

let (data0, data12) = data.split_at_mut(y_sz);
let (data1, data2) = data12.split_at_mut(uv_sz);
// Note that `data[1]` and `data[2]`
Expand All @@ -121,20 +133,23 @@ pub unsafe extern "C" fn dav1d_default_picture_alloc(

(*p_c).stride = stride;
(*p_c).data = data.map(NonNull::new);
(*p_c).allocator_data = NonNull::new(buf.cast());
(*p_c).allocator_data = NonNull::new(Box::into_raw(buf).cast::<c_void>());
// The caller will create the real `Rav1dPicture` from the `Dav1dPicture` fields set above,
// so we don't want to drop the `Rav1dPicture` we created for convenience here.
mem::forget(p);

Rav1dResult::Ok(()).into()
}

pub unsafe extern "C" fn dav1d_default_picture_release(p: *mut Dav1dPicture, cookie: *mut c_void) {
rav1d_mem_pool_push(
cookie as *mut Rav1dMemPool,
(*p).allocator_data
.map_or_else(ptr::null_mut, |data| data.as_ptr()) as *mut Rav1dMemPoolBuffer,
);
pub unsafe extern "C" fn dav1d_default_picture_release(p: *mut Dav1dPicture, _cookie: *mut c_void) {
let buf = (*p)
.allocator_data
.unwrap()
.as_ptr()
.cast::<MemPoolBuf<MaybeUninit<u8>>>();
let buf = Box::from_raw(buf);
let MemPoolBuf { pool, buf } = *buf;
pool.push(buf);
}

impl Default for Rav1dPicAllocator {
Expand Down

0 comments on commit 4b9221e

Please sign in to comment.