From 3fba038c763e9b89badd191ab692661f99e37a2e Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Tue, 27 Feb 2024 16:35:48 -0800 Subject: [PATCH] `Rav1dFrameContext_lf::cdef_line_buf`: Make into `AlignedVec32` --- src/align.rs | 1 + src/decode.rs | 96 ++++++++++++++++++++----------------------------- src/internal.rs | 5 +-- src/lib.rs | 7 +++- 4 files changed, 46 insertions(+), 63 deletions(-) diff --git a/src/align.rs b/src/align.rs index 0320e25da..00d5f46a4 100644 --- a/src/align.rs +++ b/src/align.rs @@ -205,4 +205,5 @@ impl Default for AlignedVec { } } +pub type AlignedVec32 = AlignedVec>; pub type AlignedVec64 = AlignedVec>; diff --git a/src/decode.rs b/src/decode.rs index d1043d67b..5adc42b79 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -4387,74 +4387,54 @@ pub(crate) unsafe fn rav1d_decode_frame_init( let mut uv_stride = f.cur.stride[1]; let has_resize = (frame_hdr.size.width[0] != frame_hdr.size.width[1]) as c_int; let need_cdef_lpf_copy = (c.tc.len() > 1 && has_resize != 0) as c_int; - if y_stride * f.sbh as isize * 4 != f.lf.cdef_buf_plane_sz[0] as isize - || uv_stride * f.sbh as isize * 8 != f.lf.cdef_buf_plane_sz[1] as isize - || need_cdef_lpf_copy != f.lf.need_cdef_lpf_copy - || f.sbh != f.lf.cdef_buf_sbh - { - rav1d_free_aligned(f.lf.cdef_line_buf as *mut c_void); - let mut alloc_sz: usize = 64; - alloc_sz += (y_stride.unsigned_abs() * 4 * f.sbh as usize) << need_cdef_lpf_copy; - alloc_sz += (uv_stride.unsigned_abs() * 8 * f.sbh as usize) << need_cdef_lpf_copy; - f.lf.cdef_line_buf = rav1d_alloc_aligned(alloc_sz, 32) as *mut u8; - let mut ptr = f.lf.cdef_line_buf; - if ptr.is_null() { - f.lf.cdef_buf_plane_sz[1] = 0; - f.lf.cdef_buf_plane_sz[0] = f.lf.cdef_buf_plane_sz[1]; - return Err(ENOMEM); - } + let mut alloc_sz: usize = 64; + alloc_sz += (y_stride.unsigned_abs() * 4 * f.sbh as usize) << need_cdef_lpf_copy; + alloc_sz += (uv_stride.unsigned_abs() * 8 * f.sbh as usize) << need_cdef_lpf_copy; + // TODO: Fallbile allocation. We need to do the following on allocation + // failure: + // f.lf.cdef_buf_plane_sz = [0, 0]; + f.lf.cdef_line_buf.resize(alloc_sz, 0); + let mut ptr = f.lf.cdef_line_buf.as_mut_ptr(); + + ptr = ptr.offset(32); + if y_stride < 0 { + f.lf.cdef_line[0][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel; + f.lf.cdef_line[1][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 3))) as *mut DynPixel; + } else { + f.lf.cdef_line[0][0] = ptr.offset(y_stride * 0) as *mut DynPixel; + f.lf.cdef_line[1][0] = ptr.offset(y_stride * 2) as *mut DynPixel; + } + ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4); + if uv_stride < 0 { + f.lf.cdef_line[0][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel; + f.lf.cdef_line[0][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 3))) as *mut DynPixel; + f.lf.cdef_line[1][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 5))) as *mut DynPixel; + f.lf.cdef_line[1][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 7))) as *mut DynPixel; + } else { + f.lf.cdef_line[0][1] = ptr.offset(uv_stride * 0) as *mut DynPixel; + f.lf.cdef_line[0][2] = ptr.offset(uv_stride * 2) as *mut DynPixel; + f.lf.cdef_line[1][1] = ptr.offset(uv_stride * 4) as *mut DynPixel; + f.lf.cdef_line[1][2] = ptr.offset(uv_stride * 6) as *mut DynPixel; + } - ptr = ptr.offset(32); + if need_cdef_lpf_copy != 0 { + ptr = ptr.offset(uv_stride.abs() * f.sbh as isize * 8); if y_stride < 0 { - f.lf.cdef_line[0][0] = + f.lf.cdef_lpf_line[0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel; - f.lf.cdef_line[1][0] = - ptr.offset(-(y_stride * (f.sbh as isize * 4 - 3))) as *mut DynPixel; } else { - f.lf.cdef_line[0][0] = ptr.offset(y_stride * 0) as *mut DynPixel; - f.lf.cdef_line[1][0] = ptr.offset(y_stride * 2) as *mut DynPixel; + f.lf.cdef_lpf_line[0] = ptr as *mut DynPixel; } ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4); if uv_stride < 0 { - f.lf.cdef_line[0][1] = + f.lf.cdef_lpf_line[1] = + ptr.offset(-(uv_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel; + f.lf.cdef_lpf_line[2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel; - f.lf.cdef_line[0][2] = - ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 3))) as *mut DynPixel; - f.lf.cdef_line[1][1] = - ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 5))) as *mut DynPixel; - f.lf.cdef_line[1][2] = - ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 7))) as *mut DynPixel; } else { - f.lf.cdef_line[0][1] = ptr.offset(uv_stride * 0) as *mut DynPixel; - f.lf.cdef_line[0][2] = ptr.offset(uv_stride * 2) as *mut DynPixel; - f.lf.cdef_line[1][1] = ptr.offset(uv_stride * 4) as *mut DynPixel; - f.lf.cdef_line[1][2] = ptr.offset(uv_stride * 6) as *mut DynPixel; + f.lf.cdef_lpf_line[1] = ptr as *mut DynPixel; + f.lf.cdef_lpf_line[2] = ptr.offset(uv_stride * f.sbh as isize * 4) as *mut DynPixel; } - - if need_cdef_lpf_copy != 0 { - ptr = ptr.offset(uv_stride.abs() * f.sbh as isize * 8); - if y_stride < 0 { - f.lf.cdef_lpf_line[0] = - ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel; - } else { - f.lf.cdef_lpf_line[0] = ptr as *mut DynPixel; - } - ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4); - if uv_stride < 0 { - f.lf.cdef_lpf_line[1] = - ptr.offset(-(uv_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel; - f.lf.cdef_lpf_line[2] = - ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel; - } else { - f.lf.cdef_lpf_line[1] = ptr as *mut DynPixel; - f.lf.cdef_lpf_line[2] = ptr.offset(uv_stride * f.sbh as isize * 4) as *mut DynPixel; - } - } - - f.lf.cdef_buf_plane_sz[0] = y_stride as c_int * f.sbh * 4; - f.lf.cdef_buf_plane_sz[1] = uv_stride as c_int * f.sbh * 8; - f.lf.need_cdef_lpf_copy = need_cdef_lpf_copy; - f.lf.cdef_buf_sbh = f.sbh; } let sb128 = seq_hdr.sb128; diff --git a/src/internal.rs b/src/internal.rs index 5a5e7b0eb..f070472ff 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -428,14 +428,12 @@ pub struct Rav1dFrameContext_lf { pub level: Vec<[u8; 4]>, pub mask: Vec, /* len = w*h */ pub lr_mask: Vec, - pub cdef_buf_plane_sz: [c_int; 2], /* stride*sbh*4 */ - pub cdef_buf_sbh: c_int, pub lr_buf_plane_sz: [c_int; 2], /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */ pub lim_lut: Align16, pub last_sharpness: c_int, pub lvl: [[[[u8; 2]; 8]; 4]; 8], /* [8 seg_id][4 dir][8 ref][2 is_gmv] */ pub tx_lpf_right_edge: Vec, /* len = h*2 */ - pub cdef_line_buf: *mut u8, + pub cdef_line_buf: AlignedVec32, /* AlignedVec32 */ pub lr_line_buf: *mut u8, pub cdef_line: [[*mut DynPixel; 3]; 2], /* [2 pre/post][3 plane] */ pub cdef_lpf_line: [*mut DynPixel; 3], /* plane */ @@ -444,7 +442,6 @@ pub struct Rav1dFrameContext_lf { // in-loop filter per-frame state keeping pub start_of_tile_row: *mut u8, pub start_of_tile_row_sz: c_int, - pub need_cdef_lpf_copy: c_int, pub p: [*mut DynPixel; 3], pub sr_p: [*mut DynPixel; 3], pub restore_planes: c_int, // enum LrRestorePlanes diff --git a/src/lib.rs b/src/lib.rs index df7f840a1..998b84076 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -325,6 +325,11 @@ pub(crate) unsafe fn rav1d_open(c_out: &mut *mut Rav1dContext, s: &Rav1dSettings f.task_thread.pending_tasks = Default::default(); } (&mut f.task_thread.ttd as *mut Arc).write(Arc::clone(&(*c).task_thread)); + addr_of_mut!(f.lf.level).write(Default::default()); + addr_of_mut!(f.lf.mask).write(Default::default()); + addr_of_mut!(f.lf.lr_mask).write(Default::default()); + addr_of_mut!(f.lf.tx_lpf_right_edge).write(Default::default()); + addr_of_mut!(f.lf.cdef_line_buf).write(Default::default()); f.lf.last_sharpness = -(1 as c_int); rav1d_refmvs_init(&mut f.rf); n = n.wrapping_add(1); @@ -903,7 +908,7 @@ impl Drop for Rav1dContext { let _ = mem::take(&mut f.lf.tx_lpf_right_edge); // TODO: remove when context is owned free(f.lf.start_of_tile_row as *mut c_void); rav1d_refmvs_clear(&mut f.rf); - rav1d_free_aligned(f.lf.cdef_line_buf as *mut c_void); + let _ = mem::take(&mut f.lf.cdef_line_buf); // TODO: remove when context is owned rav1d_free_aligned(f.lf.lr_line_buf as *mut c_void); n_1 = n_1.wrapping_add(1); }