diff --git a/src/cdef.rs b/src/cdef.rs index 95ac24fea..dcd8133c1 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -10,9 +10,9 @@ use crate::src::cpu::CpuFlags; use crate::src::disjoint_mut::DisjointMut; use crate::src::ffi_safe::FFISafe; use crate::src::pic_or_buf::PicOrBuf; -use crate::src::pixels::Pixels; use crate::src::strided::Strided as _; use crate::src::tables::dav1d_cdef_directions; +use crate::src::with_offset::WithOffset; use crate::src::wrap_fn_ptr::wrap_fn_ptr; use bitflags::bitflags; use libc::ptrdiff_t; @@ -54,11 +54,12 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, - _top: *const FFISafe>>, + _top: *const FFISafe>, _bottom: *const FFISafe>, ) -> ()); -pub type CdefBottom<'a> = PicOrBuf<'a, AlignedVec64>; +pub type CdefTop<'a> = WithOffset<&'a DisjointMut>>; +pub type CdefBottom<'a> = WithOffset>>; impl cdef::Fn { /// CDEF operates entirely on pre-filter data. @@ -70,10 +71,8 @@ impl cdef::Fn { &self, dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: &DisjointMut>, - top_off: usize, + top: CdefTop<'_>, bottom: CdefBottom<'_>, - bottom_off: usize, pri_strength: c_int, sec_strength: u8, dir: c_int, @@ -84,12 +83,9 @@ impl cdef::Fn { let dst_ptr = dst.as_mut_ptr::().cast(); let stride = dst.stride(); let left = ptr::from_ref(left).cast(); - let top_ptr = (&*top.element_as(top_off) as *const BD::Pixel).cast(); - let bottom_ptr = match bottom { - PicOrBuf::Pic(pic) => pic.as_ptr_at::(bottom_off).cast(), - PicOrBuf::Buf(buf) => (&*buf.element_as(bottom_off) as *const BD::Pixel).cast(), - }; - let top = FFISafe::new(top); + let top_ptr = top.as_ptr::().cast(); + let bottom_ptr = bottom.as_ptr::().cast(); + let top = FFISafe::new(&top); let bottom = FFISafe::new(&bottom); let sec_strength = sec_strength as c_int; let damping = damping as c_int; @@ -172,16 +168,14 @@ fn padding( tmp: &mut [i16; TMP_STRIDE * TMP_STRIDE], src: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: &DisjointMut>, - top_off: usize, + top: CdefTop<'_>, bottom: CdefBottom<'_>, - bottom_off: usize, w: usize, h: usize, edges: CdefEdgeFlags, ) { - let top_off = top_off - 2; - let bottom_off = bottom_off - 2; + let top = top - 2_usize; + let bottom = bottom - 2_usize; let stride = src.pixel_stride::(); // Fill extended input buffer. @@ -207,8 +201,8 @@ fn padding( } for (i, y) in (y_start..2).enumerate() { - let offset = top_off.wrapping_add_signed(i as isize * stride); - let top = top.slice_as::<_, BD::Pixel>((offset.., ..x_end)); + let top = top + i as isize * stride; + let top = top.data.slice_as::<_, BD::Pixel>((top.offset.., ..x_end)); for x in x_start..x_end { tmp[x + y * TMP_STRIDE] = top[x].as_::(); } @@ -228,12 +222,12 @@ fn padding( } for (i, y) in (h + 2..y_end).enumerate() { let tmp = &mut tmp[y * TMP_STRIDE..]; - let bottom_off = bottom_off.wrapping_add_signed(i as isize * stride); + let bottom = bottom + i as isize * stride; // This is a fallback `fn`, so perf is not as important here, so an extra branch // here should be okay. - let bottom = match bottom { - PicOrBuf::Pic(pic) => &*pic.slice::((bottom_off.., ..x_end)), - PicOrBuf::Buf(buf) => &*buf.slice_as((bottom_off.., ..x_end)), + let bottom = match bottom.data { + PicOrBuf::Pic(pic) => &*pic.slice::((bottom.offset.., ..x_end)), + PicOrBuf::Buf(buf) => &*buf.slice_as((bottom.offset.., ..x_end)), }; for x in x_start..x_end { tmp[x] = bottom[x].as_::(); @@ -245,10 +239,8 @@ fn padding( fn cdef_filter_block_rust( dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: &DisjointMut>, - top_off: usize, + top: CdefTop<'_>, bottom: CdefBottom<'_>, - bottom_off: usize, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -263,9 +255,7 @@ fn cdef_filter_block_rust( assert!((w == 4 || w == 8) && (h == 4 || h == 8)); let mut tmp = [0; TMP_STRIDE * TMP_STRIDE]; // `12 * 12` is the maximum value of `TMP_STRIDE * (h + 4)`. - padding::( - &mut tmp, dst, left, top, top_off, bottom, bottom_off, w, h, edges, - ); + padding::(&mut tmp, dst, left, top, bottom, w, h, edges); let tmp = tmp; let tmp_offset = 2 * TMP_STRIDE + 2; @@ -385,8 +375,8 @@ unsafe extern "C" fn cdef_filter_block_c_erased; 8], - top_ptr: *const DynPixel, - bottom_ptr: *const DynPixel, + _top_ptr: *const DynPixel, + _bottom_ptr: *const DynPixel, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -394,7 +384,7 @@ unsafe extern "C" fn cdef_filter_block_c_erased, - top: *const FFISafe>>, + top: *const FFISafe>, bottom: *const FFISafe>, ) { // SAFETY: Was passed as `FFISafe::new(_)` in `cdef_dir::Fn::call`. @@ -402,25 +392,15 @@ unsafe extern "C" fn cdef_filter_block_c_erased().cast_const(); - // SAFETY: Reverse of what was done in `cdef::Fn::call`. `top_ptr` is - // derived from `top` and so is safe to calculate the offset from. - let top_off = unsafe { top_ptr.cast::().offset_from(top_base) } as usize; + let top = *unsafe { FFISafe::get(top) }; // SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`. let bottom = *unsafe { FFISafe::get(bottom) }; - let bottom_base = bottom.as_ptr::(); - // SAFETY: Reverse of what was done in `cdef::Fn::call`. `bottom_ptr` is - // derived from `bottom` and so is safe to calculate the offset from. - let bottom_off = unsafe { bottom_ptr.cast::().offset_from(bottom_base) } as usize; let bd = BD::from_c(bitdepth_max); cdef_filter_block_rust( dst, left, top, - top_off, bottom, - bottom_off, pri_strength, sec_strength, dir, @@ -571,7 +551,7 @@ unsafe extern "C" fn cdef_filter_neon_erased< edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, - _top: *const FFISafe>>, + _top: *const FFISafe>, _bottom: *const FFISafe>, ) { use crate::src::align::Align16; diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index e6ad0644a..9986a0a1d 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -15,6 +15,7 @@ use crate::src::internal::Rav1dTaskContext; use crate::src::pic_or_buf::PicOrBuf; use crate::src::strided::Strided as _; use crate::src::strided::WithStride; +use crate::src::with_offset::WithOffset; use bitflags::bitflags; use libc::ptrdiff_t; use std::cmp; @@ -269,30 +270,30 @@ pub(crate) fn rav1d_cdef_brow( None } else if sbrow_start && by == by_start { let top = if resize { - ( - &f.lf.cdef_line_buf, - f.lf.cdef_lpf_line[0].wrapping_add_signed( + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[0].wrapping_add_signed( ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize, ), - ) + } } else { - ( - &f.lf.lr_line_buf, - f.lf.lr_lpf_line[0].wrapping_add_signed( + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[0].wrapping_add_signed( (sby * (4 << sb128) - 4) as isize * y_stride + (bx * 4) as isize, ), - ) + } }; let bottom = bptrs[0] + (8 * y_stride); - Some((top, PicOrBuf::Pic(bottom.data), bottom.offset)) + Some((top, WithOffset::pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { - let top = ( - &f.lf.cdef_line_buf, - f.lf.cdef_line[tf as usize][0].wrapping_add_signed( + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][0].wrapping_add_signed( (sby * 4) as isize * y_stride + (bx * 4) as isize, ), - ); + }; let (buf, offset) = if resize { ( &f.lf.cdef_line_buf, @@ -311,26 +312,28 @@ pub(crate) fn rav1d_cdef_brow( }; Some(( top, - PicOrBuf::Buf(WithStride { - buf, - stride: y_stride, - }), - offset, + WithOffset { + data: PicOrBuf::Buf(WithStride { + buf, + stride: y_stride, + }), + offset, + }, )) } else { None }; - let ((top, top_off), bot, bot_off) = top_bot.unwrap_or_else(|| { - let top = ( - &f.lf.cdef_line_buf, - f.lf.cdef_line[tf as usize][0].wrapping_add_signed( + let (top, bot) = top_bot.unwrap_or_else(|| { + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][0].wrapping_add_signed( have_tt as isize * (sby * 4) as isize * y_stride + (bx * 4) as isize, ), - ); + }; let bottom = bptrs[0] + (8 * y_stride); - (top, PicOrBuf::Pic(bottom.data), bottom.offset) + (top, WithOffset::pic(bottom)) }); if y_pri_lvl != 0 { @@ -340,9 +343,7 @@ pub(crate) fn rav1d_cdef_brow( bptrs[0], &lr_bak[bit as usize][0], top, - top_off, bot, - bot_off, adj_y_pri_lvl, y_sec_lvl, dir, @@ -356,9 +357,7 @@ pub(crate) fn rav1d_cdef_brow( bptrs[0], &lr_bak[bit as usize][0], top, - top_off, bot, - bot_off, 0, y_sec_lvl, 0, @@ -381,33 +380,34 @@ pub(crate) fn rav1d_cdef_brow( None } else if sbrow_start && by == by_start { let top = if resize { - ( - &f.lf.cdef_line_buf, - f.lf.cdef_lpf_line[pl].wrapping_add_signed( + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[pl].wrapping_add_signed( ((sby - 1) * 4) as isize * uv_stride + (bx * 4 >> ss_hor) as isize, ), - ) + } } else { let line = sby * (4 << sb128) - 4; - ( - &f.lf.lr_line_buf, - f.lf.lr_lpf_line[pl].wrapping_add_signed( + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[pl].wrapping_add_signed( line as isize * uv_stride + (bx * 4 >> ss_hor) as isize, ), - ) + } }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - Some((top, PicOrBuf::Pic(bottom.data), bottom.offset)) + Some((top, WithOffset::pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { - let top = ( - &f.lf.cdef_line_buf, - f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( - (sby * 8) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - ); + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][pl] + .wrapping_add_signed( + (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), + }; let (buf, offset) = if resize { ( &f.lf.cdef_line_buf, @@ -428,35 +428,36 @@ pub(crate) fn rav1d_cdef_brow( }; Some(( top, - PicOrBuf::Buf(WithStride { - buf, - stride: uv_stride, - }), - offset, + WithOffset { + data: PicOrBuf::Buf(WithStride { + buf, + stride: uv_stride, + }), + offset, + }, )) } else { None }; - let ((top, top_off), bot, bot_off) = top_bot.unwrap_or_else(|| { - let top = ( - &f.lf.cdef_line_buf, - f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( - have_tt as isize * (sby * 8) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - ); + let (top, bot) = top_bot.unwrap_or_else(|| { + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][pl] + .wrapping_add_signed( + have_tt as isize * (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), + }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - (top, PicOrBuf::Pic(bottom.data), bottom.offset) + (top, WithOffset::pic(bottom)) }); f.dsp.cdef.fb[uv_idx as usize].call::( bptrs[pl], &lr_bak[bit as usize][pl], top, - top_off, bot, - bot_off, uv_pri_lvl.into(), uv_sec_lvl, uvdir,