diff --git a/src/cdef.rs b/src/cdef.rs index 6ba2acca8..d4847fd38 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -5,10 +5,14 @@ use crate::include::common::bitdepth::LeftPixelRow2px; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; +use crate::src::align::AlignedVec64; use crate::src::cpu::CpuFlags; +use crate::src::disjoint_mut::DisjointMut; use crate::src::ffi_safe::FFISafe; +use crate::src::pic_or_buf::PicOrBuf; use crate::src::strided::Strided as _; use crate::src::tables::dav1d_cdef_directions; +use crate::src::with_offset::WithOffset; use crate::src::wrap_fn_ptr::wrap_fn_ptr; use bitflags::bitflags; use libc::ptrdiff_t; @@ -16,7 +20,6 @@ use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ptr; -use std::slice; #[cfg(all( feature = "asm", @@ -42,8 +45,8 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( dst_ptr: *mut DynPixel, stride: ptrdiff_t, left: *const [LeftPixelRow2px; 8], - top: *const DynPixel, - bottom: *const DynPixel, + top_ptr: *const DynPixel, + bottom_ptr: *const DynPixel, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -51,20 +54,25 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, + _top: *const FFISafe, + _bottom: *const FFISafe, ) -> ()); +pub type CdefTop<'a> = WithOffset<&'a DisjointMut>>; +pub type CdefBottom<'a> = WithOffset>>; + impl cdef::Fn { /// CDEF operates entirely on pre-filter data. /// If bottom/right edges are present (according to `edges`), /// then the pre-filter data is located in `dst`. /// However, the edge pixels above `dst` may be post-filter, /// so in order to get access to pre-filter top pixels, use `top`. - pub unsafe fn call( + pub fn call( &self, dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: *const BD::Pixel, - bottom: *const BD::Pixel, + top: CdefTop, + bottom: CdefBottom, pri_strength: c_int, sec_strength: u8, dir: c_int, @@ -75,26 +83,33 @@ impl cdef::Fn { let dst_ptr = dst.as_mut_ptr::().cast(); let stride = dst.stride(); let left = ptr::from_ref(left).cast(); - let top = top.cast(); - let bottom = bottom.cast(); + let top_ptr = top.as_ptr::().cast(); + let bottom_ptr = bottom.as_ptr::().cast(); + let top = FFISafe::new(&top); + let bottom = FFISafe::new(&bottom); let sec_strength = sec_strength as c_int; let damping = damping as c_int; let bd = bd.into_c(); let dst = FFISafe::new(&dst); - self.get()( - dst_ptr, - stride, - left, - top, - bottom, - pri_strength, - sec_strength, - dir, - damping, - edges, - bd, - dst, - ) + // SAFETY: Rust fallback is safe, asm is assumed to do the same. + unsafe { + self.get()( + dst_ptr, + stride, + left, + top_ptr, + bottom_ptr, + pri_strength, + sec_strength, + dir, + damping, + edges, + bd, + dst, + top, + bottom, + ) + } } } @@ -149,17 +164,18 @@ pub fn fill(tmp: &mut [i16], w: usize, h: usize) { } } -unsafe fn padding( +fn padding( tmp: &mut [i16; TMP_STRIDE * TMP_STRIDE], src: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: *const BD::Pixel, - bottom: *const BD::Pixel, + top: CdefTop, + bottom: CdefBottom, w: usize, h: usize, edges: CdefEdgeFlags, ) { - let [top, bottom] = [top, bottom].map(|it| it.sub(2)); + let top = top - 2_usize; + let bottom = bottom - 2_usize; let stride = src.pixel_stride::(); // Fill extended input buffer. @@ -185,7 +201,8 @@ unsafe fn padding( } for (i, y) in (y_start..2).enumerate() { - let top = slice::from_raw_parts(top.offset(i as isize * stride), x_end); + let top = top + i as isize * stride; + let top = top.data.slice_as::<_, BD::Pixel>((top.offset.., ..x_end)); for x in x_start..x_end { tmp[x + y * TMP_STRIDE] = top[x].as_::(); } @@ -205,7 +222,13 @@ unsafe fn padding( } for (i, y) in (h + 2..y_end).enumerate() { let tmp = &mut tmp[y * TMP_STRIDE..]; - let bottom = slice::from_raw_parts(bottom.offset(i as isize * stride), x_end); + let bottom = bottom + i as isize * stride; + // This is a fallback `fn`, so perf is not as important here, so an extra branch + // here should be okay. + let bottom = match bottom.data { + PicOrBuf::Pic(pic) => &*pic.slice::((bottom.offset.., ..x_end)), + PicOrBuf::Buf(buf) => &*buf.slice_as((bottom.offset.., ..x_end)), + }; for x in x_start..x_end { tmp[x] = bottom[x].as_::(); } @@ -213,11 +236,11 @@ unsafe fn padding( } #[inline(never)] -unsafe fn cdef_filter_block_rust( +fn cdef_filter_block_rust( dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: *const BD::Pixel, - bottom: *const BD::Pixel, + top: CdefTop, + bottom: CdefBottom, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -347,12 +370,13 @@ unsafe fn cdef_filter_block_rust( /// # Safety /// /// Must be called by [`cdef::Fn::call`]. +#[deny(unsafe_op_in_unsafe_fn)] unsafe extern "C" fn cdef_filter_block_c_erased( _dst_ptr: *mut DynPixel, _stride: ptrdiff_t, left: *const [LeftPixelRow2px; 8], - top: *const DynPixel, - bottom: *const DynPixel, + _top_ptr: *const DynPixel, + _bottom_ptr: *const DynPixel, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -360,13 +384,17 @@ unsafe extern "C" fn cdef_filter_block_c_erased, + top: *const FFISafe, + bottom: *const FFISafe, ) { // SAFETY: Was passed as `FFISafe::new(_)` in `cdef_dir::Fn::call`. let dst = *unsafe { FFISafe::get(dst) }; // SAFETY: Reverse of cast in `cdef::Fn::call`. let left = unsafe { &*left.cast() }; - let top = top.cast(); - let bottom = bottom.cast(); + // SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`. + let top = *unsafe { FFISafe::get(top) }; + // SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`. + let bottom = *unsafe { FFISafe::get(bottom) }; let bd = BD::from_c(bitdepth_max); cdef_filter_block_rust( dst, @@ -523,6 +551,8 @@ unsafe extern "C" fn cdef_filter_neon_erased< edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, + _top: *const FFISafe, + _bottom: *const FFISafe, ) { use crate::src::align::Align16; diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 747e59a12..88a7ceec0 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -1,3 +1,5 @@ +#![deny(unsafe_code)] + use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BPC; use crate::include::common::intops::ulog2; @@ -10,7 +12,10 @@ use crate::src::disjoint_mut::DisjointMut; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dFrameData; use crate::src::internal::Rav1dTaskContext; +use crate::src::pic_or_buf::PicOrBuf; use crate::src::strided::Strided as _; +use crate::src::strided::WithStride; +use crate::src::with_offset::WithOffset; use bitflags::bitflags; use libc::ptrdiff_t; use std::cmp; @@ -125,7 +130,7 @@ fn adjust_strength(strength: u8, var: c_uint) -> c_int { strength as c_int * (4 + i) + 8 >> 4 } -pub(crate) unsafe fn rav1d_cdef_brow( +pub(crate) fn rav1d_cdef_brow( c: &Rav1dContext, tc: &mut Rav1dTaskContext, f: &Rav1dFrameData, @@ -261,66 +266,67 @@ pub(crate) unsafe fn rav1d_cdef_brow( 0 }; - let mut top = 0 as *const BD::Pixel; - let mut bot = 0 as *const BD::Pixel; - let mut offset: ptrdiff_t; - let st_y: bool; - - if !have_tt { - st_y = true; + let top_bot = if !have_tt { + None } else if sbrow_start && by == by_start { - if resize { - offset = ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize; - top = &*f - .lf - .cdef_line_buf - .element_as((f.lf.cdef_lpf_line[0] as isize + offset) as usize); + let top = if resize { + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[0], + } + ((sby - 1) * 4) as isize * y_stride + + (bx * 4) as isize } else { - offset = (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride - + (bx * 4) as isize; - top = &*f - .lf - .lr_line_buf - .element_as((f.lf.lr_lpf_line[0] as isize + offset) as usize); - } + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[0], + } + (sby * (4 << sb128) - 4) as isize * y_stride + + (bx * 4) as isize + }; let bottom = bptrs[0] + (8 * y_stride); - bot = bottom.as_ptr::(); - st_y = false; + Some((top, WithOffset::pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { - offset = (sby * 4) as isize * y_stride + (bx * 4) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_line[tf as usize][0] as isize + offset) as usize, - ); - if resize { - offset = (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize; - // FIXME incorrect; should be kept as an offset for later slices. - bot = &*f - .lf - .cdef_line_buf - .element_as((f.lf.cdef_lpf_line[0] as isize + offset) as usize); + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][0], + } + (sby * 4) as isize * y_stride + + (bx * 4) as isize; + let buf = if resize { + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[0], + } + (sby * 4 + 2) as isize * y_stride + + (bx * 4) as isize } else { - let line = sby * ((4 as c_int) << sb128) + 4 * sb128 as c_int + 2; - offset = line as isize * y_stride + (bx * 4) as isize; - // FIXME incorrect; should be kept as an offset for later slices. - bot = &*f - .lf - .lr_line_buf - .element_as((f.lf.lr_lpf_line[0] as isize + offset) as usize); - } - st_y = false; + let line = sby * (4 << sb128) + 4 * sb128 as c_int + 2; + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[0], + } + line as isize * y_stride + + (bx * 4) as isize + }; + Some(( + top, + WithOffset { + data: PicOrBuf::Buf(WithStride { + buf: buf.data, + stride: y_stride, + }), + offset: buf.offset, + }, + )) } else { - st_y = true; - } + None + }; - if st_y { - offset = have_tt as isize * (sby * 4) as isize * y_stride + let (top, bot) = top_bot.unwrap_or_else(|| { + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][0], + } + have_tt as isize * (sby * 4) as isize * y_stride + (bx * 4) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_line[tf as usize][0] as isize + offset) as usize, - ); let bottom = bptrs[0] + (8 * y_stride); - bot = bottom.as_ptr::(); - } + (top, WithOffset::pic(bottom)) + }); if y_pri_lvl != 0 { let adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance); @@ -362,66 +368,68 @@ pub(crate) unsafe fn rav1d_cdef_brow( 0 }; for pl in 1..=2 { - let st_uv: bool; - if !have_tt { - st_uv = true; + let top_bot = if !have_tt { + None } else if sbrow_start && by == by_start { - if resize { - offset = ((sby - 1) * 4) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_lpf_line[pl] as isize + offset) as usize, - ); + let top = if resize { + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[pl], + } + ((sby - 1) * 4) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize } else { - let line_0 = sby * ((4 as c_int) << sb128) - 4; - offset = line_0 as isize * uv_stride - + (bx * 4 >> ss_hor) as isize; - top = &*f.lf.lr_line_buf.element_as( - (f.lf.lr_lpf_line[pl] as isize + offset) as usize, - ); - } + let line = sby * (4 << sb128) - 4; + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[pl], + } + line as isize * uv_stride + + (bx * 4 >> ss_hor) as isize + }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - bot = bottom.as_ptr::(); - st_uv = false; + Some((top, WithOffset::pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { - let top_offset: ptrdiff_t = (sby * 8) as isize * uv_stride + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][pl], + } + (sby * 8) as isize * uv_stride + (bx * 4 >> ss_hor) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_line[tf as usize][pl] as isize + top_offset) - as usize, - ); - if resize { - offset = (sby * 4 + 2) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize; - // FIXME incorrect; should be kept as an offset for later slices. - bot = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_lpf_line[pl] as isize + offset) as usize, - ); + let buf = if resize { + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[pl], + } + (sby * 4 + 2) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize } else { - let line = - sby * ((4 as c_int) << sb128) + 4 * sb128 as c_int + 2; - offset = - line as isize * uv_stride + (bx * 4 >> ss_hor) as isize; - // FIXME incorrect; should be kept as an offset for later slices. - bot = &*f.lf.lr_line_buf.element_as( - (f.lf.lr_lpf_line[pl] as isize + offset) as usize, - ); - } - st_uv = false; + let line = sby * (4 << sb128) + 4 * sb128 as c_int + 2; + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[pl], + } + line as isize * uv_stride + + (bx * 4 >> ss_hor) as isize + }; + Some(( + top, + WithOffset { + data: PicOrBuf::Buf(WithStride { + buf: buf.data, + stride: uv_stride, + }), + offset: buf.offset, + }, + )) } else { - st_uv = true; - } - - if st_uv { - let offset = have_tt as isize * (sby * 8) as isize * uv_stride + None + }; + + let (top, bot) = top_bot.unwrap_or_else(|| { + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][pl], + } + have_tt as isize * (sby * 8) as isize * uv_stride + (bx * 4 >> ss_hor) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_line[tf as usize][pl] as isize + offset) - as usize, - ); let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - bot = bottom.as_ptr::(); - } + (top, WithOffset::pic(bottom)) + }); f.dsp.cdef.fb[uv_idx as usize].call::( bptrs[pl], diff --git a/src/internal.rs b/src/internal.rs index 595570659..fcbd7dd66 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -509,8 +509,7 @@ pub(crate) struct Rav1dFrameContext_bd_fn { pub filter_sbrow: filter_sbrow_fn, pub filter_sbrow_deblock_cols: filter_sbrow_fn, pub filter_sbrow_deblock_rows: filter_sbrow_fn, - pub filter_sbrow_cdef: - unsafe fn(&Rav1dContext, &Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (), + pub filter_sbrow_cdef: fn(&Rav1dContext, &Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (), pub filter_sbrow_resize: filter_sbrow_fn, pub filter_sbrow_lr: filter_sbrow_fn, pub backup_ipred_edge: backup_ipred_edge_fn, diff --git a/src/pixels.rs b/src/pixels.rs index 5b04e1d7a..5c17f6b65 100644 --- a/src/pixels.rs +++ b/src/pixels.rs @@ -24,7 +24,7 @@ pub trait Pixels { } /// Absolute ptr to [`BitDepth::Pixel`]s. - fn _as_ptr(&self) -> *const BD::Pixel { + fn as_ptr(&self) -> *const BD::Pixel { self.as_mut_ptr::().cast_const() } diff --git a/src/recon.rs b/src/recon.rs index f7c34e6de..f8c0c2707 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -3709,14 +3709,12 @@ pub(crate) fn rav1d_filter_sbrow_cdef( let ss_ver = f.cur.p.layout == Rav1dPixelLayout::I420 && i != 0; p[i] - ((8 * p[i].pixel_stride::()) >> ss_ver as u8) }); - // TODO make safe - unsafe { rav1d_cdef_brow::(c, tc, f, p_up, prev_mask, start - 2, start, true, sby) }; + rav1d_cdef_brow::(c, tc, f, p_up, prev_mask, start - 2, start, true, sby); } let n_blks = sbsz - 2 * ((sby + 1) < f.sbh) as c_int; let end = cmp::min(start + n_blks, f.bh); - // TODO make safe - unsafe { rav1d_cdef_brow::(c, tc, f, p, mask_offset, start, end, false, sby) }; + rav1d_cdef_brow::(c, tc, f, p, mask_offset, start, end, false, sby); } pub(crate) fn rav1d_filter_sbrow_resize( diff --git a/src/thread_task.rs b/src/thread_task.rs index 9bfac2cc4..a801ad671 100644 --- a/src/thread_task.rs +++ b/src/thread_task.rs @@ -1275,10 +1275,7 @@ pub fn rav1d_worker_task(task_thread: Arc) { let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); if seq_hdr.cdef != 0 { if fc.task_thread.error.load(Ordering::SeqCst) == 0 { - // SAFETY: TODO make safe - unsafe { - (f.bd_fn().filter_sbrow_cdef)(c, &f, &mut tc, sby); - } + (f.bd_fn().filter_sbrow_cdef)(c, &f, &mut tc, sby); } drop(f); reset_task_cur_async(ttd, t.frame_idx, c.fc.len() as u32);