diff --git a/src/looprestoration.rs b/src/looprestoration.rs index 04ac20f29..ca43c591b 100644 --- a/src/looprestoration.rs +++ b/src/looprestoration.rs @@ -1022,6 +1022,19 @@ type fn_dav1d_sgr_weighted1_neon = unsafe extern "C" fn( bitdepth_max: libc::c_int, ); +type fn_dav1d_sgr_weighted2_neon = unsafe extern "C" fn( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + t2: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: *const int16_t, + bitdepth_max: libc::c_int, +); + // TODO(randomPoison): Temporarily pub until all usages can be made private. #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] pub(crate) trait BitDepthLooprestorationArm: BitDepth { @@ -1030,6 +1043,7 @@ pub(crate) trait BitDepthLooprestorationArm: BitDepth { const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter_neon; const dav1d_sgr_finish_filter2_neon: fn_dav1d_sgr_finish_filter_neon; const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon; + const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon; } #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] @@ -1117,6 +1131,25 @@ impl BitDepthLooprestorationArm for BitDepth8 { dav1d_sgr_weighted1_8bpc_neon }; + + const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon = { + extern "C" { + fn dav1d_sgr_weighted2_8bpc_neon( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + t2: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: *const int16_t, + bitdepth_max: libc::c_int, + ); + } + + dav1d_sgr_weighted2_8bpc_neon + }; } #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] @@ -1204,6 +1237,25 @@ impl BitDepthLooprestorationArm for BitDepth16 { dav1d_sgr_weighted1_16bpc_neon }; + + const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon = { + extern "C" { + fn dav1d_sgr_weighted2_16bpc_neon( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + t2: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: *const int16_t, + bitdepth_max: libc::c_int, + ); + } + + dav1d_sgr_weighted2_16bpc_neon + }; } // TODO(randomPoison): Temporarily pub until callers are deduplicated. @@ -1318,7 +1370,7 @@ pub(crate) unsafe fn dav1d_sgr_filter2_neon( BD::dav1d_sgr_finish_filter2_neon(tmp, src, stride, a, b, w, h); } -// TODO: Temporarily pub until init logic is deduplicated. +// TODO(randomPoison): Temporarily pub until init logic is deduplicated. #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] pub(crate) unsafe extern "C" fn sgr_filter_5x5_neon_erased( p: *mut libc::c_void, @@ -1445,3 +1497,82 @@ unsafe fn sgr_filter_3x3_neon( bd.bitdepth_max().as_(), ); } + +// TODO(randomPoison): Temporarily pub until init logic is deduplicated. +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +pub(crate) unsafe extern "C" fn sgr_filter_mix_neon_erased( + p: *mut libc::c_void, + stride: ptrdiff_t, + left: *const libc::c_void, + lpf: *const libc::c_void, + w: libc::c_int, + h: libc::c_int, + params: *const LooprestorationParams, + edges: LrEdgeFlags, + bitdepth_max: libc::c_int, +) { + sgr_filter_mix_neon( + p.cast(), + stride, + left.cast(), + lpf.cast(), + w, + h, + params, + edges, + BD::from_c(bitdepth_max), + ) +} + +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +unsafe extern "C" fn sgr_filter_mix_neon( + dst: *mut BD::Pixel, + stride: ptrdiff_t, + left: *const [BD::Pixel; 4], + mut lpf: *const BD::Pixel, + w: libc::c_int, + h: libc::c_int, + params: *const LooprestorationParams, + edges: LrEdgeFlags, + bd: BD, +) { + let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]); + let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]); + dav1d_sgr_filter2_neon( + tmp1.0.as_mut_ptr(), + dst, + stride, + left, + lpf, + w, + h, + (*params).sgr.s0 as libc::c_int, + edges, + bd, + ); + dav1d_sgr_filter1_neon( + tmp2.0.as_mut_ptr(), + dst, + stride, + left, + lpf, + w, + h, + (*params).sgr.s1 as libc::c_int, + edges, + bd, + ); + let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1]; + BD::dav1d_sgr_weighted2_neon( + dst, + stride, + dst, + stride, + tmp1.0.as_mut_ptr(), + tmp2.0.as_mut_ptr(), + w, + h, + wt.as_ptr(), + bd.bitdepth_max().as_(), + ); +} diff --git a/src/looprestoration_tmpl_16.rs b/src/looprestoration_tmpl_16.rs index 8cc482ec5..f05f2acf9 100644 --- a/src/looprestoration_tmpl_16.rs +++ b/src/looprestoration_tmpl_16.rs @@ -1,27 +1,12 @@ use crate::include::common::bitdepth::BitDepth16; use crate::include::stddef::*; use crate::include::stdint::*; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] +#[cfg(all(feature = "asm", target_arch = "arm"))] use crate::src::align::Align16; use ::libc; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -extern "C" { - fn dav1d_sgr_weighted2_16bpc_neon( - dst: *mut pixel, - dst_stride: ptrdiff_t, - src: *const pixel, - src_stride: ptrdiff_t, - t1: *const int16_t, - t2: *const int16_t, - w: libc::c_int, - h: libc::c_int, - wt: *const int16_t, - bitdepth_max: libc::c_int, - ); -} #[cfg(all(feature = "asm", target_arch = "arm"))] extern "C" { fn dav1d_wiener_filter_h_16bpc_neon( @@ -52,7 +37,7 @@ pub type pixel = uint16_t; pub type coef = int32_t; pub type const_left_pixel_row = *const [pixel; 4]; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +#[cfg(all(feature = "asm", target_arch = "arm"))] #[rustfmt::skip] use crate::{ src::looprestoration::LrEdgeFlags, @@ -241,7 +226,6 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( ) { use crate::src::arm::cpu::DAV1D_ARM_CPU_FLAG_NEON; // TODO(randomPoison): Import temporarily needed until init fns are deduplicated. - #[cfg(target_arch = "aarch64")] use crate::src::looprestoration::*; let flags: libc::c_uint = dav1d_get_cpu_flags(); @@ -263,94 +247,10 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( if bpc == 10 { (*c).sgr[0] = sgr_filter_5x5_neon_erased::; (*c).sgr[1] = sgr_filter_3x3_neon_erased::; - (*c).sgr[2] = sgr_filter_mix_neon_erased; + (*c).sgr[2] = sgr_filter_mix_neon_erased::; } } - - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon_erased( - p: *mut libc::c_void, - stride: ptrdiff_t, - left: *const libc::c_void, - lpf: *const libc::c_void, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: libc::c_int, -) { - sgr_filter_mix_neon( - p.cast(), - stride, - left.cast(), - lpf.cast(), - w, - h, - params, - edges, - bitdepth_max, - ) -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const [pixel; 4], - mut lpf: *const pixel, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: libc::c_int, -) { - use crate::include::common::bitdepth::BitDepth; - use crate::src::looprestoration::dav1d_sgr_filter1_neon; - use crate::src::looprestoration::dav1d_sgr_filter2_neon; - - let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]); - let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]); - dav1d_sgr_filter2_neon( - tmp1.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s0 as libc::c_int, - edges, - BitDepth16::from_c(bitdepth_max), - ); - dav1d_sgr_filter1_neon( - tmp2.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s1 as libc::c_int, - edges, - BitDepth16::from_c(bitdepth_max), - ); - let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1]; - dav1d_sgr_weighted2_16bpc_neon( - dst, - stride, - dst, - stride, - tmp1.0.as_mut_ptr(), - tmp2.0.as_mut_ptr(), - w, - h, - wt.as_ptr(), - bitdepth_max, - ); -} - #[no_mangle] #[cold] pub unsafe extern "C" fn dav1d_loop_restoration_dsp_init_16bpc( diff --git a/src/looprestoration_tmpl_8.rs b/src/looprestoration_tmpl_8.rs index 5b9c2a838..f59255899 100644 --- a/src/looprestoration_tmpl_8.rs +++ b/src/looprestoration_tmpl_8.rs @@ -1,26 +1,11 @@ use crate::include::common::bitdepth::BitDepth8; use crate::include::stdint::*; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] +#[cfg(all(feature = "asm", target_arch = "arm"))] use crate::src::align::Align16; use ::libc; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -extern "C" { - fn dav1d_sgr_weighted2_8bpc_neon( - dst: *mut pixel, - dst_stride: ptrdiff_t, - src: *const pixel, - src_stride: ptrdiff_t, - t1: *const int16_t, - t2: *const int16_t, - w: libc::c_int, - h: libc::c_int, - wt: *const int16_t, - ); -} - #[cfg(all(feature = "asm", target_arch = "arm"))] extern "C" { fn dav1d_wiener_filter_h_8bpc_neon( @@ -54,7 +39,7 @@ use crate::src::looprestoration::sgr_mix_c_erased; use crate::src::looprestoration::wiener_c_erased; use crate::src::looprestoration::Dav1dLoopRestorationDSPContext; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +#[cfg(all(feature = "asm", target_arch = "arm"))] #[rustfmt::skip] use crate::{ include::stddef::ptrdiff_t, @@ -220,7 +205,6 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( ) { use crate::src::arm::cpu::DAV1D_ARM_CPU_FLAG_NEON; // TODO(randomPoison): Import temporarily needed until init fns are deduplicated. - #[cfg(target_arch = "aarch64")] use crate::src::looprestoration::*; let flags = dav1d_get_cpu_flags(); @@ -241,86 +225,7 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( (*c).sgr[0] = sgr_filter_5x5_neon_erased::; (*c).sgr[1] = sgr_filter_3x3_neon_erased::; - (*c).sgr[2] = sgr_filter_mix_neon_erased; -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon_erased( - p: *mut libc::c_void, - stride: ptrdiff_t, - left: *const libc::c_void, - lpf: *const libc::c_void, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - _bitdepth_max: libc::c_int, -) { - sgr_filter_mix_neon( - p.cast(), - stride, - left.cast(), - lpf.cast(), - w, - h, - params, - edges, - ) -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const [pixel; 4], - mut lpf: *const pixel, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, -) { - use crate::include::common::bitdepth::BitDepth; - use crate::src::looprestoration::dav1d_sgr_filter1_neon; - use crate::src::looprestoration::dav1d_sgr_filter2_neon; - - let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]); - let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]); - dav1d_sgr_filter2_neon( - tmp1.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s0 as libc::c_int, - edges, - BitDepth8::new(()), - ); - dav1d_sgr_filter1_neon( - tmp2.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s1 as libc::c_int, - edges, - BitDepth8::new(()), - ); - let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1]; - dav1d_sgr_weighted2_8bpc_neon( - dst, - stride, - dst, - stride, - tmp1.0.as_mut_ptr(), - tmp2.0.as_mut_ptr(), - w, - h, - wt.as_ptr(), - ); + (*c).sgr[2] = sgr_filter_mix_neon_erased::; } #[no_mangle]