Skip to content

Commit

Permalink
fn sgr_filter_mix: Deduplicate w/ generics
Browse files Browse the repository at this point in the history
  • Loading branch information
randomPoison authored and kkysen committed Aug 3, 2023
1 parent 11801a2 commit 0efc634
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 202 deletions.
133 changes: 132 additions & 1 deletion src/looprestoration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,19 @@ type fn_dav1d_sgr_weighted1_neon<BD> = unsafe extern "C" fn(
bitdepth_max: libc::c_int,
);

type fn_dav1d_sgr_weighted2_neon<BD> = unsafe extern "C" fn(
dst: *mut <BD as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BD as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
t2: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: *const int16_t,
bitdepth_max: libc::c_int,
);

// TODO(randomPoison): Temporarily pub until all usages can be made private.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) trait BitDepthLooprestorationArm: BitDepth {
Expand All @@ -1030,6 +1043,7 @@ pub(crate) trait BitDepthLooprestorationArm: BitDepth {
const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter_neon<Self>;
const dav1d_sgr_finish_filter2_neon: fn_dav1d_sgr_finish_filter_neon<Self>;
const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon<Self>;
const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon<Self>;
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
Expand Down Expand Up @@ -1117,6 +1131,25 @@ impl BitDepthLooprestorationArm for BitDepth8 {

dav1d_sgr_weighted1_8bpc_neon
};

const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon<Self> = {
extern "C" {
fn dav1d_sgr_weighted2_8bpc_neon(
dst: *mut <BitDepth8 as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BitDepth8 as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
t2: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: *const int16_t,
bitdepth_max: libc::c_int,
);
}

dav1d_sgr_weighted2_8bpc_neon
};
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
Expand Down Expand Up @@ -1204,6 +1237,25 @@ impl BitDepthLooprestorationArm for BitDepth16 {

dav1d_sgr_weighted1_16bpc_neon
};

const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon<Self> = {
extern "C" {
fn dav1d_sgr_weighted2_16bpc_neon(
dst: *mut <BitDepth16 as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BitDepth16 as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
t2: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: *const int16_t,
bitdepth_max: libc::c_int,
);
}

dav1d_sgr_weighted2_16bpc_neon
};
}

// TODO(randomPoison): Temporarily pub until callers are deduplicated.
Expand Down Expand Up @@ -1318,7 +1370,7 @@ pub(crate) unsafe fn dav1d_sgr_filter2_neon<BD: BitDepthLooprestorationArm>(
BD::dav1d_sgr_finish_filter2_neon(tmp, src, stride, a, b, w, h);
}

// TODO: Temporarily pub until init logic is deduplicated.
// TODO(randomPoison): Temporarily pub until init logic is deduplicated.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) unsafe extern "C" fn sgr_filter_5x5_neon_erased<BD: BitDepthLooprestorationArm>(
p: *mut libc::c_void,
Expand Down Expand Up @@ -1445,3 +1497,82 @@ unsafe fn sgr_filter_3x3_neon<BD: BitDepthLooprestorationArm>(
bd.bitdepth_max().as_(),
);
}

// TODO(randomPoison): Temporarily pub until init logic is deduplicated.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) unsafe extern "C" fn sgr_filter_mix_neon_erased<BD: BitDepthLooprestorationArm>(
p: *mut libc::c_void,
stride: ptrdiff_t,
left: *const libc::c_void,
lpf: *const libc::c_void,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
sgr_filter_mix_neon(
p.cast(),
stride,
left.cast(),
lpf.cast(),
w,
h,
params,
edges,
BD::from_c(bitdepth_max),
)
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
unsafe extern "C" fn sgr_filter_mix_neon<BD: BitDepthLooprestorationArm>(
dst: *mut BD::Pixel,
stride: ptrdiff_t,
left: *const [BD::Pixel; 4],
mut lpf: *const BD::Pixel,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bd: BD,
) {
let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]);
let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]);
dav1d_sgr_filter2_neon(
tmp1.0.as_mut_ptr(),
dst,
stride,
left,
lpf,
w,
h,
(*params).sgr.s0 as libc::c_int,
edges,
bd,
);
dav1d_sgr_filter1_neon(
tmp2.0.as_mut_ptr(),
dst,
stride,
left,
lpf,
w,
h,
(*params).sgr.s1 as libc::c_int,
edges,
bd,
);
let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1];
BD::dav1d_sgr_weighted2_neon(
dst,
stride,
dst,
stride,
tmp1.0.as_mut_ptr(),
tmp2.0.as_mut_ptr(),
w,
h,
wt.as_ptr(),
bd.bitdepth_max().as_(),
);
}
106 changes: 3 additions & 103 deletions src/looprestoration_tmpl_16.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,12 @@
use crate::include::common::bitdepth::BitDepth16;
use crate::include::stddef::*;
use crate::include::stdint::*;
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))]
#[cfg(all(feature = "asm", target_arch = "arm"))]
use crate::src::align::Align16;
use ::libc;
#[cfg(feature = "asm")]
use cfg_if::cfg_if;

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
extern "C" {
fn dav1d_sgr_weighted2_16bpc_neon(
dst: *mut pixel,
dst_stride: ptrdiff_t,
src: *const pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
t2: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: *const int16_t,
bitdepth_max: libc::c_int,
);
}
#[cfg(all(feature = "asm", target_arch = "arm"))]
extern "C" {
fn dav1d_wiener_filter_h_16bpc_neon(
Expand Down Expand Up @@ -52,7 +37,7 @@ pub type pixel = uint16_t;
pub type coef = int32_t;
pub type const_left_pixel_row = *const [pixel; 4];

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
#[cfg(all(feature = "asm", target_arch = "arm"))]
#[rustfmt::skip]
use crate::{
src::looprestoration::LrEdgeFlags,
Expand Down Expand Up @@ -241,7 +226,6 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm(
) {
use crate::src::arm::cpu::DAV1D_ARM_CPU_FLAG_NEON;
// TODO(randomPoison): Import temporarily needed until init fns are deduplicated.
#[cfg(target_arch = "aarch64")]
use crate::src::looprestoration::*;

let flags: libc::c_uint = dav1d_get_cpu_flags();
Expand All @@ -263,94 +247,10 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm(
if bpc == 10 {
(*c).sgr[0] = sgr_filter_5x5_neon_erased::<BitDepth16>;
(*c).sgr[1] = sgr_filter_3x3_neon_erased::<BitDepth16>;
(*c).sgr[2] = sgr_filter_mix_neon_erased;
(*c).sgr[2] = sgr_filter_mix_neon_erased::<BitDepth16>;
}
}



#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
unsafe extern "C" fn sgr_filter_mix_neon_erased(
p: *mut libc::c_void,
stride: ptrdiff_t,
left: *const libc::c_void,
lpf: *const libc::c_void,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
sgr_filter_mix_neon(
p.cast(),
stride,
left.cast(),
lpf.cast(),
w,
h,
params,
edges,
bitdepth_max,
)
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
unsafe extern "C" fn sgr_filter_mix_neon(
dst: *mut pixel,
stride: ptrdiff_t,
left: *const [pixel; 4],
mut lpf: *const pixel,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
use crate::include::common::bitdepth::BitDepth;
use crate::src::looprestoration::dav1d_sgr_filter1_neon;
use crate::src::looprestoration::dav1d_sgr_filter2_neon;

let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]);
let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]);
dav1d_sgr_filter2_neon(
tmp1.0.as_mut_ptr(),
dst,
stride,
left,
lpf,
w,
h,
(*params).sgr.s0 as libc::c_int,
edges,
BitDepth16::from_c(bitdepth_max),
);
dav1d_sgr_filter1_neon(
tmp2.0.as_mut_ptr(),
dst,
stride,
left,
lpf,
w,
h,
(*params).sgr.s1 as libc::c_int,
edges,
BitDepth16::from_c(bitdepth_max),
);
let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1];
dav1d_sgr_weighted2_16bpc_neon(
dst,
stride,
dst,
stride,
tmp1.0.as_mut_ptr(),
tmp2.0.as_mut_ptr(),
w,
h,
wt.as_ptr(),
bitdepth_max,
);
}

#[no_mangle]
#[cold]
pub unsafe extern "C" fn dav1d_loop_restoration_dsp_init_16bpc(
Expand Down
Loading

0 comments on commit 0efc634

Please sign in to comment.