diff --git a/src/looprestoration.rs b/src/looprestoration.rs index 33b4fe291..2bf6325f8 100644 --- a/src/looprestoration.rs +++ b/src/looprestoration.rs @@ -1581,35 +1581,6 @@ unsafe fn rav1d_sgr_weighted2_neon( ) } -/// # Safety -/// -/// Must be called by [`loop_restoration_filter::Fn::call`]. -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_5x5_neon_erased( - _p_ptr: *mut DynPixel, - _stride: ptrdiff_t, - left: *const LeftPixelRow, - lpf: *const DynPixel, - w: c_int, - h: c_int, - params: &LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: c_int, - p: *const FFISafe, - _lpf: *const FFISafe>>, -) { - // SAFETY: Was passed as `FFISafe::new(_)` in `loop_restoration_filter::Fn::call`. - let p = *unsafe { FFISafe::get(p) }; - let left = left.cast(); - let lpf = lpf.cast(); - let bd = BD::from_c(bitdepth_max); - let w = w as usize; - let h = h as usize; - // SAFETY: Length sliced in `loop_restoration_filter::Fn::call`. - let left = unsafe { slice::from_raw_parts(left, h) }; - sgr_filter_5x5_neon(p, left, lpf, w, h, params, edges, bd) -} - #[cfg(all(feature = "asm", target_arch = "arm"))] unsafe fn sgr_filter_5x5_neon( dst: Rav1dPictureDataComponentOffset, @@ -1629,35 +1600,6 @@ unsafe fn sgr_filter_5x5_neon( rav1d_sgr_weighted1_neon(dst, dst, &mut tmp.0, w, h, sgr.w0, bd); } -/// # Safety -/// -/// Must be called by [`loop_restoration_filter::Fn::call`]. -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_3x3_neon_erased( - _p_ptr: *mut DynPixel, - _stride: ptrdiff_t, - left: *const LeftPixelRow, - lpf: *const DynPixel, - w: c_int, - h: c_int, - params: &LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: c_int, - p: *const FFISafe, - _lpf: *const FFISafe>>, -) { - // SAFETY: Was passed as `FFISafe::new(_)` in `loop_restoration_filter::Fn::call`. - let p = *unsafe { FFISafe::get(p) }; - let left = left.cast(); - let lpf = lpf.cast(); - let w = w as usize; - let h = h as usize; - let bd = BD::from_c(bitdepth_max); - // SAFETY: Length sliced in `loop_restoration_filter::Fn::call`. - let left = unsafe { slice::from_raw_parts(left, h) }; - sgr_filter_3x3_neon(p, left, lpf, w, h, params, edges, bd) -} - #[cfg(all(feature = "asm", target_arch = "arm"))] unsafe fn sgr_filter_3x3_neon( dst: Rav1dPictureDataComponentOffset, @@ -3444,35 +3386,6 @@ unsafe fn sgr_filter_mix_neon( } } -/// # Safety -/// -/// Must be called by [`loop_restoration_filter::Fn::call`]. -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon_erased( - _p_ptr: *mut DynPixel, - _stride: ptrdiff_t, - left: *const LeftPixelRow, - lpf: *const DynPixel, - w: c_int, - h: c_int, - params: &LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: c_int, - p: *const FFISafe, - _lpf: *const FFISafe>>, -) { - // SAFETY: Was passed as `FFISafe::new(_)` in `loop_restoration_filter::Fn::call`. - let p = *unsafe { FFISafe::get(p) }; - let left = left.cast(); - let lpf = lpf.cast(); - let bd = BD::from_c(bitdepth_max); - let w = w as usize; - let h = h as usize; - // SAFETY: Length sliced in `loop_restoration_filter::Fn::call`. - let left = unsafe { slice::from_raw_parts(left, h) }; - sgr_filter_mix_neon(p, left, lpf, w, h, params, edges, bd) -} - #[cfg(all(feature = "asm", target_arch = "arm"))] unsafe fn sgr_filter_mix_neon( dst: Rav1dPictureDataComponentOffset, @@ -3495,6 +3408,96 @@ unsafe fn sgr_filter_mix_neon( rav1d_sgr_weighted2_neon(dst, dst, &mut tmp1.0, &mut tmp2.0, w, h, &wt, bd); } +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +mod neon_erased { + use super::*; + + /// # Safety + /// + /// Must be called by [`loop_restoration_filter::Fn::call`]. + pub unsafe extern "C" fn sgr_filter_5x5_neon_erased( + _p_ptr: *mut DynPixel, + _stride: ptrdiff_t, + left: *const LeftPixelRow, + lpf: *const DynPixel, + w: c_int, + h: c_int, + params: &LooprestorationParams, + edges: LrEdgeFlags, + bitdepth_max: c_int, + p: *const FFISafe, + _lpf: *const FFISafe>>, + ) { + // SAFETY: Was passed as `FFISafe::new(_)` in `loop_restoration_filter::Fn::call`. + let p = *unsafe { FFISafe::get(p) }; + let left = left.cast(); + let lpf = lpf.cast(); + let bd = BD::from_c(bitdepth_max); + let w = w as usize; + let h = h as usize; + // SAFETY: Length sliced in `loop_restoration_filter::Fn::call`. + let left = unsafe { slice::from_raw_parts(left, h) }; + sgr_filter_5x5_neon(p, left, lpf, w, h, params, edges, bd) + } + + /// # Safety + /// + /// Must be called by [`loop_restoration_filter::Fn::call`]. + #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] + pub unsafe extern "C" fn sgr_filter_3x3_neon_erased( + _p_ptr: *mut DynPixel, + _stride: ptrdiff_t, + left: *const LeftPixelRow, + lpf: *const DynPixel, + w: c_int, + h: c_int, + params: &LooprestorationParams, + edges: LrEdgeFlags, + bitdepth_max: c_int, + p: *const FFISafe, + _lpf: *const FFISafe>>, + ) { + // SAFETY: Was passed as `FFISafe::new(_)` in `loop_restoration_filter::Fn::call`. + let p = *unsafe { FFISafe::get(p) }; + let left = left.cast(); + let lpf = lpf.cast(); + let w = w as usize; + let h = h as usize; + let bd = BD::from_c(bitdepth_max); + // SAFETY: Length sliced in `loop_restoration_filter::Fn::call`. + let left = unsafe { slice::from_raw_parts(left, h) }; + sgr_filter_3x3_neon(p, left, lpf, w, h, params, edges, bd) + } + + /// # Safety + /// + /// Must be called by [`loop_restoration_filter::Fn::call`]. + pub unsafe extern "C" fn sgr_filter_mix_neon_erased( + _p_ptr: *mut DynPixel, + _stride: ptrdiff_t, + left: *const LeftPixelRow, + lpf: *const DynPixel, + w: c_int, + h: c_int, + params: &LooprestorationParams, + edges: LrEdgeFlags, + bitdepth_max: c_int, + p: *const FFISafe, + _lpf: *const FFISafe>>, + ) { + // SAFETY: Was passed as `FFISafe::new(_)` in `loop_restoration_filter::Fn::call`. + let p = *unsafe { FFISafe::get(p) }; + let left = left.cast(); + let lpf = lpf.cast(); + let bd = BD::from_c(bitdepth_max); + let w = w as usize; + let h = h as usize; + // SAFETY: Length sliced in `loop_restoration_filter::Fn::call`. + let left = unsafe { slice::from_raw_parts(left, h) }; + sgr_filter_mix_neon(p, left, lpf, w, h, params, edges, bd) + } +} + impl Rav1dLoopRestorationDSPContext { pub const fn default() -> Self { Self { @@ -3610,6 +3613,8 @@ impl Rav1dLoopRestorationDSPContext { } if matches!(BD::BPC, BPC::BPC8) || bpc == 10 { + use neon_erased::*; + self.sgr[0] = loop_restoration_filter::Fn::new(sgr_filter_5x5_neon_erased::); self.sgr[1] = loop_restoration_filter::Fn::new(sgr_filter_3x3_neon_erased::); self.sgr[2] = loop_restoration_filter::Fn::new(sgr_filter_mix_neon_erased::);