Skip to content

Commit

Permalink
fn sgr_filter{5x5, 3x3, mix}: Deduplicate w/ generics (#346)
Browse files Browse the repository at this point in the history
  • Loading branch information
kkysen authored Aug 3, 2023
2 parents 04ee7fd + 0efc634 commit 8b99d69
Show file tree
Hide file tree
Showing 3 changed files with 318 additions and 482 deletions.
308 changes: 308 additions & 0 deletions src/looprestoration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1010,13 +1010,40 @@ type fn_dav1d_sgr_finish_filter_neon<BD> = unsafe extern "C" fn(
h: libc::c_int,
);

type fn_dav1d_sgr_weighted1_neon<BD> = unsafe extern "C" fn(
dst: *mut <BD as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BD as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: libc::c_int,
bitdepth_max: libc::c_int,
);

type fn_dav1d_sgr_weighted2_neon<BD> = unsafe extern "C" fn(
dst: *mut <BD as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BD as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
t2: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: *const int16_t,
bitdepth_max: libc::c_int,
);

// TODO(randomPoison): Temporarily pub until all usages can be made private.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) trait BitDepthLooprestorationArm: BitDepth {
const dav1d_sgr_box3_h_neon: fn_dav1d_sgr_box_h_neon<Self>;
const dav1d_sgr_box5_h_neon: fn_dav1d_sgr_box_h_neon<Self>;
const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter_neon<Self>;
const dav1d_sgr_finish_filter2_neon: fn_dav1d_sgr_finish_filter_neon<Self>;
const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon<Self>;
const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon<Self>;
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
Expand Down Expand Up @@ -1086,6 +1113,43 @@ impl BitDepthLooprestorationArm for BitDepth8 {

dav1d_sgr_finish_filter2_8bpc_neon
};

const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon<Self> = {
extern "C" {
fn dav1d_sgr_weighted1_8bpc_neon(
dst: *mut <BitDepth8 as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BitDepth8 as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: libc::c_int,
bitdepth_max: libc::c_int,
);
}

dav1d_sgr_weighted1_8bpc_neon
};

const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon<Self> = {
extern "C" {
fn dav1d_sgr_weighted2_8bpc_neon(
dst: *mut <BitDepth8 as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BitDepth8 as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
t2: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: *const int16_t,
bitdepth_max: libc::c_int,
);
}

dav1d_sgr_weighted2_8bpc_neon
};
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
Expand Down Expand Up @@ -1155,6 +1219,43 @@ impl BitDepthLooprestorationArm for BitDepth16 {

dav1d_sgr_finish_filter2_16bpc_neon
};

const dav1d_sgr_weighted1_neon: fn_dav1d_sgr_weighted1_neon<Self> = {
extern "C" {
fn dav1d_sgr_weighted1_16bpc_neon(
dst: *mut <BitDepth16 as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BitDepth16 as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: libc::c_int,
bitdepth_max: libc::c_int,
);
}

dav1d_sgr_weighted1_16bpc_neon
};

const dav1d_sgr_weighted2_neon: fn_dav1d_sgr_weighted2_neon<Self> = {
extern "C" {
fn dav1d_sgr_weighted2_16bpc_neon(
dst: *mut <BitDepth16 as BitDepth>::Pixel,
dst_stride: ptrdiff_t,
src: *const <BitDepth16 as BitDepth>::Pixel,
src_stride: ptrdiff_t,
t1: *const int16_t,
t2: *const int16_t,
w: libc::c_int,
h: libc::c_int,
wt: *const int16_t,
bitdepth_max: libc::c_int,
);
}

dav1d_sgr_weighted2_16bpc_neon
};
}

// TODO(randomPoison): Temporarily pub until callers are deduplicated.
Expand Down Expand Up @@ -1268,3 +1369,210 @@ pub(crate) unsafe fn dav1d_sgr_filter2_neon<BD: BitDepthLooprestorationArm>(
dav1d_sgr_calc_ab2_neon(a, b, w, h, strength, bd.bitdepth_max().as_());
BD::dav1d_sgr_finish_filter2_neon(tmp, src, stride, a, b, w, h);
}

// TODO(randomPoison): Temporarily pub until init logic is deduplicated.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) unsafe extern "C" fn sgr_filter_5x5_neon_erased<BD: BitDepthLooprestorationArm>(
p: *mut libc::c_void,
stride: ptrdiff_t,
left: *const libc::c_void,
lpf: *const libc::c_void,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
sgr_filter_5x5_neon(
p.cast(),
stride,
left.cast(),
lpf.cast(),
w,
h,
params,
edges,
BD::from_c(bitdepth_max),
)
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
unsafe fn sgr_filter_5x5_neon<BD: BitDepthLooprestorationArm>(
dst: *mut BD::Pixel,
stride: ptrdiff_t,
left: *const [BD::Pixel; 4],
mut lpf: *const BD::Pixel,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bd: BD,
) {
let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]);
dav1d_sgr_filter2_neon(
tmp.0.as_mut_ptr(),
dst,
stride,
left,
lpf,
w,
h,
(*params).sgr.s0 as libc::c_int,
edges,
bd,
);
BD::dav1d_sgr_weighted1_neon(
dst,
stride,
dst,
stride,
tmp.0.as_mut_ptr(),
w,
h,
(*params).sgr.w0 as libc::c_int,
bd.bitdepth_max().as_(),
);
}

// TODO(randomPoison): Temporarily pub until init logic is deduplicated.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) unsafe extern "C" fn sgr_filter_3x3_neon_erased<BD: BitDepthLooprestorationArm>(
p: *mut libc::c_void,
stride: ptrdiff_t,
left: *const libc::c_void,
lpf: *const libc::c_void,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
sgr_filter_3x3_neon(
p.cast(),
stride,
left.cast(),
lpf.cast(),
w,
h,
params,
edges,
BD::from_c(bitdepth_max),
)
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
unsafe fn sgr_filter_3x3_neon<BD: BitDepthLooprestorationArm>(
dst: *mut BD::Pixel,
stride: ptrdiff_t,
left: *const [BD::Pixel; 4],
mut lpf: *const BD::Pixel,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bd: BD,
) {
let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]);
dav1d_sgr_filter1_neon(
tmp.0.as_mut_ptr(),
dst,
stride,
left,
lpf,
w,
h,
(*params).sgr.s1 as libc::c_int,
edges,
bd,
);
BD::dav1d_sgr_weighted1_neon(
dst,
stride,
dst,
stride,
tmp.0.as_mut_ptr(),
w,
h,
(*params).sgr.w1 as libc::c_int,
bd.bitdepth_max().as_(),
);
}

// TODO(randomPoison): Temporarily pub until init logic is deduplicated.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) unsafe extern "C" fn sgr_filter_mix_neon_erased<BD: BitDepthLooprestorationArm>(
p: *mut libc::c_void,
stride: ptrdiff_t,
left: *const libc::c_void,
lpf: *const libc::c_void,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
sgr_filter_mix_neon(
p.cast(),
stride,
left.cast(),
lpf.cast(),
w,
h,
params,
edges,
BD::from_c(bitdepth_max),
)
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
unsafe extern "C" fn sgr_filter_mix_neon<BD: BitDepthLooprestorationArm>(
dst: *mut BD::Pixel,
stride: ptrdiff_t,
left: *const [BD::Pixel; 4],
mut lpf: *const BD::Pixel,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bd: BD,
) {
let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]);
let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]);
dav1d_sgr_filter2_neon(
tmp1.0.as_mut_ptr(),
dst,
stride,
left,
lpf,
w,
h,
(*params).sgr.s0 as libc::c_int,
edges,
bd,
);
dav1d_sgr_filter1_neon(
tmp2.0.as_mut_ptr(),
dst,
stride,
left,
lpf,
w,
h,
(*params).sgr.s1 as libc::c_int,
edges,
bd,
);
let wt: [int16_t; 2] = [(*params).sgr.w0, (*params).sgr.w1];
BD::dav1d_sgr_weighted2_neon(
dst,
stride,
dst,
stride,
tmp1.0.as_mut_ptr(),
tmp2.0.as_mut_ptr(),
w,
h,
wt.as_ptr(),
bd.bitdepth_max().as_(),
);
}
Loading

0 comments on commit 8b99d69

Please sign in to comment.