From 51b9ae967449e28eb4069b6b3b106c9504634c14 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 31 Jul 2023 11:08:52 -0700 Subject: [PATCH 1/3] `fn sgr_filter_3x3`: Deduplicate w/ generics --- src/looprestoration.rs | 113 +++++++++++++++++++++++++++++++++ src/looprestoration_tmpl_16.rs | 67 +------------------ src/looprestoration_tmpl_8.rs | 65 +------------------ 3 files changed, 115 insertions(+), 130 deletions(-) diff --git a/src/looprestoration.rs b/src/looprestoration.rs index 8461dab22..90c6742a7 100644 --- a/src/looprestoration.rs +++ b/src/looprestoration.rs @@ -1010,6 +1010,18 @@ type fn_dav1d_sgr_finish_filter_neon = unsafe extern "C" fn( h: libc::c_int, ); +type fn_dav1d_sgr_weighted1_neon = unsafe extern "C" fn( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: libc::c_int, + bitdepth_max: libc::c_int, +); + // TODO(randomPoison): Temporarily pub until all usages can be made private. #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] pub(crate) trait BitDepthLooprestorationArm: BitDepth { @@ -1017,6 +1029,7 @@ pub(crate) trait BitDepthLooprestorationArm: BitDepth { const dav1d_sgr_box5_h_neon: fn_dav1d_sgr_box_h_neon; const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter_neon; const dav1d_sgr_finish_filter2_neon: fn_dav1d_sgr_finish_filter_neon; + const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon; } #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] @@ -1086,6 +1099,24 @@ impl BitDepthLooprestorationArm for BitDepth8 { dav1d_sgr_finish_filter2_8bpc_neon }; + + const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon = { + extern "C" { + fn dav1d_sgr_weighted1_8bpc_neon( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: libc::c_int, + bitdepth_max: libc::c_int, + ); + } + + dav1d_sgr_weighted1_8bpc_neon + }; } #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] @@ -1155,6 +1186,24 @@ impl BitDepthLooprestorationArm for BitDepth16 { dav1d_sgr_finish_filter2_16bpc_neon }; + + const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon = { + extern "C" { + fn dav1d_sgr_weighted1_16bpc_neon( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: libc::c_int, + bitdepth_max: libc::c_int, + ); + } + + dav1d_sgr_weighted1_16bpc_neon + }; } // TODO(randomPoison): Temporarily pub until callers are deduplicated. @@ -1268,3 +1317,67 @@ pub(crate) unsafe fn dav1d_sgr_filter2_neon( dav1d_sgr_calc_ab2_neon(a, b, w, h, strength, bd.bitdepth_max().as_()); BD::dav1d_sgr_finish_filter2_neon(tmp, src, stride, a, b, w, h); } + +// TODO(randomPoison): Temporarily pub until init logic is deduplicated. +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +pub(crate) unsafe extern "C" fn sgr_filter_3x3_neon_erased( + p: *mut libc::c_void, + stride: ptrdiff_t, + left: *const libc::c_void, + lpf: *const libc::c_void, + w: libc::c_int, + h: libc::c_int, + params: *const LooprestorationParams, + edges: LrEdgeFlags, + bitdepth_max: libc::c_int, +) { + sgr_filter_3x3_neon( + p.cast(), + stride, + left.cast(), + lpf.cast(), + w, + h, + params, + edges, + BD::from_c(bitdepth_max), + ) +} + +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +unsafe fn sgr_filter_3x3_neon( + dst: *mut BD::Pixel, + stride: ptrdiff_t, + left: *const [BD::Pixel; 4], + mut lpf: *const BD::Pixel, + w: libc::c_int, + h: libc::c_int, + params: *const LooprestorationParams, + edges: LrEdgeFlags, + bd: BD, +) { + let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]); + dav1d_sgr_filter1_neon( + tmp.0.as_mut_ptr(), + dst, + stride, + left, + lpf, + w, + h, + (*params).sgr.s1 as libc::c_int, + edges, + bd, + ); + BD::dav1d_sgr_weighted1_neon( + dst, + stride, + dst, + stride, + tmp.0.as_mut_ptr(), + w, + h, + (*params).sgr.w1 as libc::c_int, + bd.bitdepth_max().as_(), + ); +} diff --git a/src/looprestoration_tmpl_16.rs b/src/looprestoration_tmpl_16.rs index ff8b20b9e..0dc9bbdd1 100644 --- a/src/looprestoration_tmpl_16.rs +++ b/src/looprestoration_tmpl_16.rs @@ -273,76 +273,11 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( if bpc == 10 { (*c).sgr[0] = sgr_filter_5x5_neon_erased; - (*c).sgr[1] = sgr_filter_3x3_neon_erased; + (*c).sgr[1] = sgr_filter_3x3_neon_erased::; (*c).sgr[2] = sgr_filter_mix_neon_erased; } } -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_3x3_neon_erased( - p: *mut libc::c_void, - stride: ptrdiff_t, - left: *const libc::c_void, - lpf: *const libc::c_void, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: libc::c_int, -) { - sgr_filter_3x3_neon( - p.cast(), - stride, - left.cast(), - lpf.cast(), - w, - h, - params, - edges, - bitdepth_max, - ) -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_3x3_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const [pixel; 4], - mut lpf: *const pixel, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: libc::c_int, -) { - use crate::include::common::bitdepth::BitDepth; - use crate::src::looprestoration::dav1d_sgr_filter1_neon; - - let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]); - dav1d_sgr_filter1_neon( - tmp.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s1 as libc::c_int, - edges, - BitDepth16::from_c(bitdepth_max), - ); - dav1d_sgr_weighted1_16bpc_neon( - dst, - stride, - dst, - stride, - tmp.0.as_mut_ptr(), - w, - h, - (*params).sgr.w1 as libc::c_int, - bitdepth_max, - ); -} #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] unsafe extern "C" fn sgr_filter_5x5_neon_erased( diff --git a/src/looprestoration_tmpl_8.rs b/src/looprestoration_tmpl_8.rs index a7ee7bb8a..9d15e2d1b 100644 --- a/src/looprestoration_tmpl_8.rs +++ b/src/looprestoration_tmpl_8.rs @@ -250,7 +250,7 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( } (*c).sgr[0] = sgr_filter_5x5_neon_erased; - (*c).sgr[1] = sgr_filter_3x3_neon_erased; + (*c).sgr[1] = sgr_filter_3x3_neon_erased::; (*c).sgr[2] = sgr_filter_mix_neon_erased; } @@ -317,69 +317,6 @@ unsafe extern "C" fn sgr_filter_5x5_neon( ); } -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_3x3_neon_erased( - p: *mut libc::c_void, - stride: ptrdiff_t, - left: *const libc::c_void, - lpf: *const libc::c_void, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - _bitdepth_max: libc::c_int, -) { - sgr_filter_3x3_neon( - p.cast(), - stride, - left.cast(), - lpf.cast(), - w, - h, - params, - edges, - ) -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_3x3_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const [pixel; 4], - mut lpf: *const pixel, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, -) { - use crate::include::common::bitdepth::BitDepth; - use crate::src::looprestoration::dav1d_sgr_filter1_neon; - - let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]); - dav1d_sgr_filter1_neon( - tmp.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s1 as libc::c_int, - edges, - BitDepth8::new(()), - ); - dav1d_sgr_weighted1_8bpc_neon( - dst, - stride, - dst, - stride, - tmp.0.as_mut_ptr(), - w, - h, - (*params).sgr.w1 as libc::c_int, - ); -} - #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] unsafe extern "C" fn sgr_filter_mix_neon_erased( p: *mut libc::c_void, From 11801a2462f7e31c2ad9ec918213b992469bee33 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 31 Jul 2023 11:16:51 -0700 Subject: [PATCH 2/3] `fn sgr_filter_3x3`: Deduplicate w/ generics --- src/looprestoration.rs | 64 ++++++++++++++++++++++++++++ src/looprestoration_tmpl_16.rs | 78 +--------------------------------- src/looprestoration_tmpl_8.rs | 75 +------------------------------- 3 files changed, 66 insertions(+), 151 deletions(-) diff --git a/src/looprestoration.rs b/src/looprestoration.rs index 90c6742a7..04ac20f29 100644 --- a/src/looprestoration.rs +++ b/src/looprestoration.rs @@ -1318,6 +1318,70 @@ pub(crate) unsafe fn dav1d_sgr_filter2_neon( BD::dav1d_sgr_finish_filter2_neon(tmp, src, stride, a, b, w, h); } +// TODO: Temporarily pub until init logic is deduplicated. +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +pub(crate) unsafe extern "C" fn sgr_filter_5x5_neon_erased( + p: *mut libc::c_void, + stride: ptrdiff_t, + left: *const libc::c_void, + lpf: *const libc::c_void, + w: libc::c_int, + h: libc::c_int, + params: *const LooprestorationParams, + edges: LrEdgeFlags, + bitdepth_max: libc::c_int, +) { + sgr_filter_5x5_neon( + p.cast(), + stride, + left.cast(), + lpf.cast(), + w, + h, + params, + edges, + BD::from_c(bitdepth_max), + ) +} + +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +unsafe fn sgr_filter_5x5_neon( + dst: *mut BD::Pixel, + stride: ptrdiff_t, + left: *const [BD::Pixel; 4], + mut lpf: *const BD::Pixel, + w: libc::c_int, + h: libc::c_int, + params: *const LooprestorationParams, + edges: LrEdgeFlags, + bd: BD, +) { + let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]); + dav1d_sgr_filter2_neon( + tmp.0.as_mut_ptr(), + dst, + stride, + left, + lpf, + w, + h, + (*params).sgr.s0 as libc::c_int, + edges, + bd, + ); + BD::dav1d_sgr_weighted1_neon( + dst, + stride, + dst, + stride, + tmp.0.as_mut_ptr(), + w, + h, + (*params).sgr.w0 as libc::c_int, + bd.bitdepth_max().as_(), + ); +} + // TODO(randomPoison): Temporarily pub until init logic is deduplicated. #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] pub(crate) unsafe extern "C" fn sgr_filter_3x3_neon_erased( diff --git a/src/looprestoration_tmpl_16.rs b/src/looprestoration_tmpl_16.rs index 0dc9bbdd1..8cc482ec5 100644 --- a/src/looprestoration_tmpl_16.rs +++ b/src/looprestoration_tmpl_16.rs @@ -21,17 +21,6 @@ extern "C" { wt: *const int16_t, bitdepth_max: libc::c_int, ); - fn dav1d_sgr_weighted1_16bpc_neon( - dst: *mut pixel, - dst_stride: ptrdiff_t, - src: *const pixel, - src_stride: ptrdiff_t, - t1: *const int16_t, - w: libc::c_int, - h: libc::c_int, - wt: libc::c_int, - bitdepth_max: libc::c_int, - ); } #[cfg(all(feature = "asm", target_arch = "arm"))] extern "C" { @@ -272,78 +261,13 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( } if bpc == 10 { - (*c).sgr[0] = sgr_filter_5x5_neon_erased; + (*c).sgr[0] = sgr_filter_5x5_neon_erased::; (*c).sgr[1] = sgr_filter_3x3_neon_erased::; (*c).sgr[2] = sgr_filter_mix_neon_erased; } } -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_5x5_neon_erased( - p: *mut libc::c_void, - stride: ptrdiff_t, - left: *const libc::c_void, - lpf: *const libc::c_void, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: libc::c_int, -) { - sgr_filter_5x5_neon( - p.cast(), - stride, - left.cast(), - lpf.cast(), - w, - h, - params, - edges, - bitdepth_max, - ) -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_5x5_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const [pixel; 4], - mut lpf: *const pixel, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: libc::c_int, -) { - use crate::include::common::bitdepth::BitDepth; - use crate::src::looprestoration::dav1d_sgr_filter2_neon; - - let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]); - dav1d_sgr_filter2_neon( - tmp.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s0 as libc::c_int, - edges, - BitDepth16::from_c(bitdepth_max), - ); - dav1d_sgr_weighted1_16bpc_neon( - dst, - stride, - dst, - stride, - tmp.0.as_mut_ptr(), - w, - h, - (*params).sgr.w0 as libc::c_int, - bitdepth_max, - ); -} #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] unsafe extern "C" fn sgr_filter_mix_neon_erased( diff --git a/src/looprestoration_tmpl_8.rs b/src/looprestoration_tmpl_8.rs index 9d15e2d1b..5b9c2a838 100644 --- a/src/looprestoration_tmpl_8.rs +++ b/src/looprestoration_tmpl_8.rs @@ -8,16 +8,6 @@ use cfg_if::cfg_if; #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] extern "C" { - fn dav1d_sgr_weighted1_8bpc_neon( - dst: *mut pixel, - dst_stride: ptrdiff_t, - src: *const pixel, - src_stride: ptrdiff_t, - t1: *const int16_t, - w: libc::c_int, - h: libc::c_int, - wt: libc::c_int, - ); fn dav1d_sgr_weighted2_8bpc_neon( dst: *mut pixel, dst_stride: ptrdiff_t, @@ -249,74 +239,11 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( } } - (*c).sgr[0] = sgr_filter_5x5_neon_erased; + (*c).sgr[0] = sgr_filter_5x5_neon_erased::; (*c).sgr[1] = sgr_filter_3x3_neon_erased::; (*c).sgr[2] = sgr_filter_mix_neon_erased; } -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_5x5_neon_erased( - p: *mut libc::c_void, - stride: ptrdiff_t, - left: *const libc::c_void, - lpf: *const libc::c_void, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - _bitdepth_max: libc::c_int, -) { - sgr_filter_5x5_neon( - p.cast(), - stride, - left.cast(), - lpf.cast(), - w, - h, - params, - edges, - ) -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_5x5_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const [pixel; 4], - mut lpf: *const pixel, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, -) { - use crate::include::common::bitdepth::BitDepth; - use crate::src::looprestoration::dav1d_sgr_filter2_neon; - - let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]); - dav1d_sgr_filter2_neon( - tmp.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s0 as libc::c_int, - edges, - BitDepth8::new(()), - ); - dav1d_sgr_weighted1_8bpc_neon( - dst, - stride, - dst, - stride, - tmp.0.as_mut_ptr(), - w, - h, - (*params).sgr.w0 as libc::c_int, - ); -} - #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] unsafe extern "C" fn sgr_filter_mix_neon_erased( p: *mut libc::c_void, From 0efc6347c571411bf8a8f47ae4502411159f1ac6 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 31 Jul 2023 11:33:06 -0700 Subject: [PATCH 3/3] `fn sgr_filter_mix`: Deduplicate w/ generics --- src/looprestoration.rs | 133 ++++++++++++++++++++++++++++++++- src/looprestoration_tmpl_16.rs | 106 +------------------------- src/looprestoration_tmpl_8.rs | 101 +------------------------ 3 files changed, 138 insertions(+), 202 deletions(-) diff --git a/src/looprestoration.rs b/src/looprestoration.rs index 04ac20f29..ca43c591b 100644 --- a/src/looprestoration.rs +++ b/src/looprestoration.rs @@ -1022,6 +1022,19 @@ type fn_dav1d_sgr_weighted1_neon = unsafe extern "C" fn( bitdepth_max: libc::c_int, ); +type fn_dav1d_sgr_weighted2_neon = unsafe extern "C" fn( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + t2: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: *const int16_t, + bitdepth_max: libc::c_int, +); + // TODO(randomPoison): Temporarily pub until all usages can be made private. #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] pub(crate) trait BitDepthLooprestorationArm: BitDepth { @@ -1030,6 +1043,7 @@ pub(crate) trait BitDepthLooprestorationArm: BitDepth { const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter_neon; const dav1d_sgr_finish_filter2_neon: fn_dav1d_sgr_finish_filter_neon; const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon; + const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon; } #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] @@ -1117,6 +1131,25 @@ impl BitDepthLooprestorationArm for BitDepth8 { dav1d_sgr_weighted1_8bpc_neon }; + + const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon = { + extern "C" { + fn dav1d_sgr_weighted2_8bpc_neon( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + t2: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: *const int16_t, + bitdepth_max: libc::c_int, + ); + } + + dav1d_sgr_weighted2_8bpc_neon + }; } #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] @@ -1204,6 +1237,25 @@ impl BitDepthLooprestorationArm for BitDepth16 { dav1d_sgr_weighted1_16bpc_neon }; + + const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon = { + extern "C" { + fn dav1d_sgr_weighted2_16bpc_neon( + dst: *mut ::Pixel, + dst_stride: ptrdiff_t, + src: *const ::Pixel, + src_stride: ptrdiff_t, + t1: *const int16_t, + t2: *const int16_t, + w: libc::c_int, + h: libc::c_int, + wt: *const int16_t, + bitdepth_max: libc::c_int, + ); + } + + dav1d_sgr_weighted2_16bpc_neon + }; } // TODO(randomPoison): Temporarily pub until callers are deduplicated. @@ -1318,7 +1370,7 @@ pub(crate) unsafe fn dav1d_sgr_filter2_neon( BD::dav1d_sgr_finish_filter2_neon(tmp, src, stride, a, b, w, h); } -// TODO: Temporarily pub until init logic is deduplicated. +// TODO(randomPoison): Temporarily pub until init logic is deduplicated. #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] pub(crate) unsafe extern "C" fn sgr_filter_5x5_neon_erased( p: *mut libc::c_void, @@ -1445,3 +1497,82 @@ unsafe fn sgr_filter_3x3_neon( bd.bitdepth_max().as_(), ); } + +// TODO(randomPoison): Temporarily pub until init logic is deduplicated. +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +pub(crate) unsafe extern "C" fn sgr_filter_mix_neon_erased( + p: *mut libc::c_void, + stride: ptrdiff_t, + left: *const libc::c_void, + lpf: *const libc::c_void, + w: libc::c_int, + h: libc::c_int, + params: *const LooprestorationParams, + edges: LrEdgeFlags, + bitdepth_max: libc::c_int, +) { + sgr_filter_mix_neon( + p.cast(), + stride, + left.cast(), + lpf.cast(), + w, + h, + params, + edges, + BD::from_c(bitdepth_max), + ) +} + +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +unsafe extern "C" fn sgr_filter_mix_neon( + dst: *mut BD::Pixel, + stride: ptrdiff_t, + left: *const [BD::Pixel; 4], + mut lpf: *const BD::Pixel, + w: libc::c_int, + h: libc::c_int, + params: *const LooprestorationParams, + edges: LrEdgeFlags, + bd: BD, +) { + let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]); + let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]); + dav1d_sgr_filter2_neon( + tmp1.0.as_mut_ptr(), + dst, + stride, + left, + lpf, + w, + h, + (*params).sgr.s0 as libc::c_int, + edges, + bd, + ); + dav1d_sgr_filter1_neon( + tmp2.0.as_mut_ptr(), + dst, + stride, + left, + lpf, + w, + h, + (*params).sgr.s1 as libc::c_int, + edges, + bd, + ); + let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1]; + BD::dav1d_sgr_weighted2_neon( + dst, + stride, + dst, + stride, + tmp1.0.as_mut_ptr(), + tmp2.0.as_mut_ptr(), + w, + h, + wt.as_ptr(), + bd.bitdepth_max().as_(), + ); +} diff --git a/src/looprestoration_tmpl_16.rs b/src/looprestoration_tmpl_16.rs index 8cc482ec5..f05f2acf9 100644 --- a/src/looprestoration_tmpl_16.rs +++ b/src/looprestoration_tmpl_16.rs @@ -1,27 +1,12 @@ use crate::include::common::bitdepth::BitDepth16; use crate::include::stddef::*; use crate::include::stdint::*; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] +#[cfg(all(feature = "asm", target_arch = "arm"))] use crate::src::align::Align16; use ::libc; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -extern "C" { - fn dav1d_sgr_weighted2_16bpc_neon( - dst: *mut pixel, - dst_stride: ptrdiff_t, - src: *const pixel, - src_stride: ptrdiff_t, - t1: *const int16_t, - t2: *const int16_t, - w: libc::c_int, - h: libc::c_int, - wt: *const int16_t, - bitdepth_max: libc::c_int, - ); -} #[cfg(all(feature = "asm", target_arch = "arm"))] extern "C" { fn dav1d_wiener_filter_h_16bpc_neon( @@ -52,7 +37,7 @@ pub type pixel = uint16_t; pub type coef = int32_t; pub type const_left_pixel_row = *const [pixel; 4]; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +#[cfg(all(feature = "asm", target_arch = "arm"))] #[rustfmt::skip] use crate::{ src::looprestoration::LrEdgeFlags, @@ -241,7 +226,6 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( ) { use crate::src::arm::cpu::DAV1D_ARM_CPU_FLAG_NEON; // TODO(randomPoison): Import temporarily needed until init fns are deduplicated. - #[cfg(target_arch = "aarch64")] use crate::src::looprestoration::*; let flags: libc::c_uint = dav1d_get_cpu_flags(); @@ -263,94 +247,10 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( if bpc == 10 { (*c).sgr[0] = sgr_filter_5x5_neon_erased::; (*c).sgr[1] = sgr_filter_3x3_neon_erased::; - (*c).sgr[2] = sgr_filter_mix_neon_erased; + (*c).sgr[2] = sgr_filter_mix_neon_erased::; } } - - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon_erased( - p: *mut libc::c_void, - stride: ptrdiff_t, - left: *const libc::c_void, - lpf: *const libc::c_void, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: libc::c_int, -) { - sgr_filter_mix_neon( - p.cast(), - stride, - left.cast(), - lpf.cast(), - w, - h, - params, - edges, - bitdepth_max, - ) -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const [pixel; 4], - mut lpf: *const pixel, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - bitdepth_max: libc::c_int, -) { - use crate::include::common::bitdepth::BitDepth; - use crate::src::looprestoration::dav1d_sgr_filter1_neon; - use crate::src::looprestoration::dav1d_sgr_filter2_neon; - - let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]); - let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]); - dav1d_sgr_filter2_neon( - tmp1.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s0 as libc::c_int, - edges, - BitDepth16::from_c(bitdepth_max), - ); - dav1d_sgr_filter1_neon( - tmp2.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s1 as libc::c_int, - edges, - BitDepth16::from_c(bitdepth_max), - ); - let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1]; - dav1d_sgr_weighted2_16bpc_neon( - dst, - stride, - dst, - stride, - tmp1.0.as_mut_ptr(), - tmp2.0.as_mut_ptr(), - w, - h, - wt.as_ptr(), - bitdepth_max, - ); -} - #[no_mangle] #[cold] pub unsafe extern "C" fn dav1d_loop_restoration_dsp_init_16bpc( diff --git a/src/looprestoration_tmpl_8.rs b/src/looprestoration_tmpl_8.rs index 5b9c2a838..f59255899 100644 --- a/src/looprestoration_tmpl_8.rs +++ b/src/looprestoration_tmpl_8.rs @@ -1,26 +1,11 @@ use crate::include::common::bitdepth::BitDepth8; use crate::include::stdint::*; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] +#[cfg(all(feature = "asm", target_arch = "arm"))] use crate::src::align::Align16; use ::libc; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -extern "C" { - fn dav1d_sgr_weighted2_8bpc_neon( - dst: *mut pixel, - dst_stride: ptrdiff_t, - src: *const pixel, - src_stride: ptrdiff_t, - t1: *const int16_t, - t2: *const int16_t, - w: libc::c_int, - h: libc::c_int, - wt: *const int16_t, - ); -} - #[cfg(all(feature = "asm", target_arch = "arm"))] extern "C" { fn dav1d_wiener_filter_h_8bpc_neon( @@ -54,7 +39,7 @@ use crate::src::looprestoration::sgr_mix_c_erased; use crate::src::looprestoration::wiener_c_erased; use crate::src::looprestoration::Dav1dLoopRestorationDSPContext; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] +#[cfg(all(feature = "asm", target_arch = "arm"))] #[rustfmt::skip] use crate::{ include::stddef::ptrdiff_t, @@ -220,7 +205,6 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( ) { use crate::src::arm::cpu::DAV1D_ARM_CPU_FLAG_NEON; // TODO(randomPoison): Import temporarily needed until init fns are deduplicated. - #[cfg(target_arch = "aarch64")] use crate::src::looprestoration::*; let flags = dav1d_get_cpu_flags(); @@ -241,86 +225,7 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm( (*c).sgr[0] = sgr_filter_5x5_neon_erased::; (*c).sgr[1] = sgr_filter_3x3_neon_erased::; - (*c).sgr[2] = sgr_filter_mix_neon_erased; -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon_erased( - p: *mut libc::c_void, - stride: ptrdiff_t, - left: *const libc::c_void, - lpf: *const libc::c_void, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, - _bitdepth_max: libc::c_int, -) { - sgr_filter_mix_neon( - p.cast(), - stride, - left.cast(), - lpf.cast(), - w, - h, - params, - edges, - ) -} - -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn sgr_filter_mix_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const [pixel; 4], - mut lpf: *const pixel, - w: libc::c_int, - h: libc::c_int, - params: *const LooprestorationParams, - edges: LrEdgeFlags, -) { - use crate::include::common::bitdepth::BitDepth; - use crate::src::looprestoration::dav1d_sgr_filter1_neon; - use crate::src::looprestoration::dav1d_sgr_filter2_neon; - - let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]); - let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]); - dav1d_sgr_filter2_neon( - tmp1.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s0 as libc::c_int, - edges, - BitDepth8::new(()), - ); - dav1d_sgr_filter1_neon( - tmp2.0.as_mut_ptr(), - dst, - stride, - left, - lpf, - w, - h, - (*params).sgr.s1 as libc::c_int, - edges, - BitDepth8::new(()), - ); - let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1]; - dav1d_sgr_weighted2_8bpc_neon( - dst, - stride, - dst, - stride, - tmp1.0.as_mut_ptr(), - tmp2.0.as_mut_ptr(), - w, - h, - wt.as_ptr(), - ); + (*c).sgr[2] = sgr_filter_mix_neon_erased::; } #[no_mangle]