Skip to content

Commit

Permalink
fn dav1d_sgr_filter2_neon: Deduplicate w/ generics (#343)
Browse files Browse the repository at this point in the history
  • Loading branch information
randomPoison authored Aug 3, 2023
1 parent 0099481 commit 04ee7fd
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 193 deletions.
155 changes: 147 additions & 8 deletions src/looprestoration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,21 @@ extern "C" {
strength: libc::c_int,
bitdepth_max: libc::c_int,
);
fn dav1d_sgr_box5_v_neon(
sumsq: *mut int32_t,
sum: *mut int16_t,
w: libc::c_int,
h: libc::c_int,
edges: LrEdgeFlags,
);
fn dav1d_sgr_calc_ab2_neon(
a: *mut int32_t,
b: *mut int16_t,
w: libc::c_int,
h: libc::c_int,
strength: libc::c_int,
bitdepth_max: libc::c_int,
);
}

// 256 * 1.5 + 3 + 3 = 390
Expand Down Expand Up @@ -974,7 +989,7 @@ unsafe fn sgr_mix_rust<BD: BitDepth>(
}
}

type fn_dav1d_sgr_box3_h_neon<BD> = unsafe extern "C" fn(
type fn_dav1d_sgr_box_h_neon<BD> = unsafe extern "C" fn(
sumsq: *mut int32_t,
sum: *mut int16_t,
left: *const [<BD as BitDepth>::Pixel; 4],
Expand All @@ -985,7 +1000,7 @@ type fn_dav1d_sgr_box3_h_neon<BD> = unsafe extern "C" fn(
edges: LrEdgeFlags,
);

type fn_dav1d_sgr_finish_filter1_neon<BD> = unsafe extern "C" fn(
type fn_dav1d_sgr_finish_filter_neon<BD> = unsafe extern "C" fn(
tmp: *mut int16_t,
src: *const <BD as BitDepth>::Pixel,
stride: ptrdiff_t,
Expand All @@ -998,13 +1013,15 @@ type fn_dav1d_sgr_finish_filter1_neon<BD> = unsafe extern "C" fn(
// TODO(randomPoison): Temporarily pub until all usages can be made private.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) trait BitDepthLooprestorationArm: BitDepth {
const dav1d_sgr_box3_h_neon: fn_dav1d_sgr_box3_h_neon<Self>;
const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter1_neon<Self>;
const dav1d_sgr_box3_h_neon: fn_dav1d_sgr_box_h_neon<Self>;
const dav1d_sgr_box5_h_neon: fn_dav1d_sgr_box_h_neon<Self>;
const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter_neon<Self>;
const dav1d_sgr_finish_filter2_neon: fn_dav1d_sgr_finish_filter_neon<Self>;
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
impl BitDepthLooprestorationArm for BitDepth8 {
const dav1d_sgr_box3_h_neon: fn_dav1d_sgr_box3_h_neon<Self> = {
const dav1d_sgr_box3_h_neon: fn_dav1d_sgr_box_h_neon<Self> = {
extern "C" {
fn dav1d_sgr_box3_h_8bpc_neon(
sumsq: *mut int32_t,
Expand All @@ -1021,7 +1038,24 @@ impl BitDepthLooprestorationArm for BitDepth8 {
dav1d_sgr_box3_h_8bpc_neon
};

const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter1_neon<Self> = {
const dav1d_sgr_box5_h_neon: fn_dav1d_sgr_box_h_neon<Self> = {
extern "C" {
fn dav1d_sgr_box5_h_8bpc_neon(
sumsq: *mut int32_t,
sum: *mut int16_t,
left: *const [<BitDepth8 as BitDepth>::Pixel; 4],
src: *const <BitDepth8 as BitDepth>::Pixel,
stride: ptrdiff_t,
w: libc::c_int,
h: libc::c_int,
edges: LrEdgeFlags,
);
}

dav1d_sgr_box5_h_8bpc_neon
};

const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter_neon<Self> = {
extern "C" {
fn dav1d_sgr_finish_filter1_8bpc_neon(
tmp: *mut int16_t,
Expand All @@ -1036,11 +1070,27 @@ impl BitDepthLooprestorationArm for BitDepth8 {

dav1d_sgr_finish_filter1_8bpc_neon
};

const dav1d_sgr_finish_filter2_neon: fn_dav1d_sgr_finish_filter_neon<Self> = {
extern "C" {
fn dav1d_sgr_finish_filter2_8bpc_neon(
tmp: *mut int16_t,
src: *const <BitDepth8 as BitDepth>::Pixel,
stride: ptrdiff_t,
a: *const int32_t,
b: *const int16_t,
w: libc::c_int,
h: libc::c_int,
);
}

dav1d_sgr_finish_filter2_8bpc_neon
};
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
impl BitDepthLooprestorationArm for BitDepth16 {
const dav1d_sgr_box3_h_neon: fn_dav1d_sgr_box3_h_neon<Self> = {
const dav1d_sgr_box3_h_neon: fn_dav1d_sgr_box_h_neon<Self> = {
extern "C" {
fn dav1d_sgr_box3_h_16bpc_neon(
sumsq: *mut int32_t,
Expand All @@ -1057,7 +1107,24 @@ impl BitDepthLooprestorationArm for BitDepth16 {
dav1d_sgr_box3_h_16bpc_neon
};

const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter1_neon<Self> = {
const dav1d_sgr_box5_h_neon: fn_dav1d_sgr_box_h_neon<Self> = {
extern "C" {
fn dav1d_sgr_box5_h_16bpc_neon(
sumsq: *mut int32_t,
sum: *mut int16_t,
left: *const [<BitDepth16 as BitDepth>::Pixel; 4],
src: *const <BitDepth16 as BitDepth>::Pixel,
stride: ptrdiff_t,
w: libc::c_int,
h: libc::c_int,
edges: LrEdgeFlags,
);
}

dav1d_sgr_box5_h_16bpc_neon
};

const dav1d_sgr_finish_filter1_neon: fn_dav1d_sgr_finish_filter_neon<Self> = {
extern "C" {
fn dav1d_sgr_finish_filter1_16bpc_neon(
tmp: *mut int16_t,
Expand All @@ -1072,6 +1139,22 @@ impl BitDepthLooprestorationArm for BitDepth16 {

dav1d_sgr_finish_filter1_16bpc_neon
};

const dav1d_sgr_finish_filter2_neon: fn_dav1d_sgr_finish_filter_neon<Self> = {
extern "C" {
fn dav1d_sgr_finish_filter2_16bpc_neon(
tmp: *mut int16_t,
src: *const <BitDepth16 as BitDepth>::Pixel,
stride: ptrdiff_t,
a: *const int32_t,
b: *const int16_t,
w: libc::c_int,
h: libc::c_int,
);
}

dav1d_sgr_finish_filter2_16bpc_neon
};
}

// TODO(randomPoison): Temporarily pub until callers are deduplicated.
Expand Down Expand Up @@ -1129,3 +1212,59 @@ pub(crate) unsafe fn dav1d_sgr_filter1_neon<BD: BitDepthLooprestorationArm>(
dav1d_sgr_calc_ab1_neon(a, b, w, h, strength, bd.bitdepth_max().as_());
BD::dav1d_sgr_finish_filter1_neon(tmp, src, stride, a, b, w, h);
}

// TODO(randomPoison): Temporarily pub until callers are deduplicated.
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
pub(crate) unsafe fn dav1d_sgr_filter2_neon<BD: BitDepthLooprestorationArm>(
mut tmp: *mut int16_t,
mut src: *const BD::Pixel,
stride: ptrdiff_t,
mut left: *const [BD::Pixel; 4],
mut lpf: *const BD::Pixel,
w: libc::c_int,
h: libc::c_int,
strength: libc::c_int,
edges: LrEdgeFlags,
bd: BD,
) {
let mut sumsq_mem: Align16<[int32_t; 27208]> = Align16([0; 27208]);
let sumsq: *mut int32_t = &mut *sumsq_mem
.0
.as_mut_ptr()
.offset(((384 + 16) * 2 + 8) as isize) as *mut int32_t;
let a: *mut int32_t = sumsq;
let mut sum_mem: Align16<[int16_t; 27216]> = Align16([0; 27216]);
let sum: *mut int16_t = &mut *sum_mem
.0
.as_mut_ptr()
.offset(((384 + 16) * 2 + 16) as isize) as *mut int16_t;
let b: *mut int16_t = sum;
BD::dav1d_sgr_box5_h_neon(sumsq, sum, left, src, stride, w, h, edges);
if edges as libc::c_uint & LR_HAVE_TOP as libc::c_int as libc::c_uint != 0 {
BD::dav1d_sgr_box5_h_neon(
&mut *sumsq.offset((-(2 as libc::c_int) * (384 + 16)) as isize),
&mut *sum.offset((-(2 as libc::c_int) * (384 + 16)) as isize),
0 as *const [BD::Pixel; 4],
lpf,
stride,
w,
2 as libc::c_int,
edges,
);
}
if edges as libc::c_uint & LR_HAVE_BOTTOM as libc::c_int as libc::c_uint != 0 {
BD::dav1d_sgr_box5_h_neon(
&mut *sumsq.offset((h * (384 + 16)) as isize),
&mut *sum.offset((h * (384 + 16)) as isize),
0 as *const [BD::Pixel; 4],
lpf.offset((6 * BD::pxstride(stride as usize)) as isize),
stride,
w,
2 as libc::c_int,
edges,
);
}
dav1d_sgr_box5_v_neon(sumsq, sum, w, h, edges);
dav1d_sgr_calc_ab2_neon(a, b, w, h, strength, bd.bitdepth_max().as_());
BD::dav1d_sgr_finish_filter2_neon(tmp, src, stride, a, b, w, h);
}
100 changes: 6 additions & 94 deletions src/looprestoration_tmpl_16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,6 @@ use cfg_if::cfg_if;

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
extern "C" {
fn dav1d_sgr_box5_v_neon(
sumsq: *mut int32_t,
sum: *mut int16_t,
w: libc::c_int,
h: libc::c_int,
edges: LrEdgeFlags,
);
fn dav1d_sgr_calc_ab2_neon(
a: *mut int32_t,
b: *mut int16_t,
w: libc::c_int,
h: libc::c_int,
strength: libc::c_int,
bitdepth_max: libc::c_int,
);
fn dav1d_sgr_finish_filter2_16bpc_neon(
tmp: *mut int16_t,
src: *const pixel,
stride: ptrdiff_t,
a: *const int32_t,
b: *const int16_t,
w: libc::c_int,
h: libc::c_int,
);
fn dav1d_sgr_weighted2_16bpc_neon(
dst: *mut pixel,
dst_stride: ptrdiff_t,
Expand All @@ -56,16 +32,6 @@ extern "C" {
wt: libc::c_int,
bitdepth_max: libc::c_int,
);
fn dav1d_sgr_box5_h_16bpc_neon(
sumsq: *mut int32_t,
sum: *mut int16_t,
left: *const [pixel; 4],
src: *const pixel,
stride: ptrdiff_t,
w: libc::c_int,
h: libc::c_int,
edges: LrEdgeFlags,
);
}
#[cfg(all(feature = "asm", target_arch = "arm"))]
extern "C" {
Expand Down Expand Up @@ -378,64 +344,6 @@ unsafe extern "C" fn sgr_filter_3x3_neon(
);
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
unsafe extern "C" fn dav1d_sgr_filter2_neon(
mut tmp: *mut int16_t,
mut src: *const pixel,
stride: ptrdiff_t,
mut left: *const [pixel; 4],
mut lpf: *const pixel,
w: libc::c_int,
h: libc::c_int,
strength: libc::c_int,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
use crate::src::looprestoration::LR_HAVE_BOTTOM;
use crate::src::looprestoration::LR_HAVE_TOP;

let mut sumsq_mem: Align16<[int32_t; 27208]> = Align16([0; 27208]);
let sumsq: *mut int32_t = &mut *sumsq_mem
.0
.as_mut_ptr()
.offset(((384 + 16) * 2 + 8) as isize) as *mut int32_t;
let a: *mut int32_t = sumsq;
let mut sum_mem: Align16<[int16_t; 27216]> = Align16([0; 27216]);
let sum: *mut int16_t = &mut *sum_mem
.0
.as_mut_ptr()
.offset(((384 + 16) * 2 + 16) as isize) as *mut int16_t;
let b: *mut int16_t = sum;
dav1d_sgr_box5_h_16bpc_neon(sumsq, sum, left, src, stride, w, h, edges);
if edges as libc::c_uint & LR_HAVE_TOP as libc::c_int as libc::c_uint != 0 {
dav1d_sgr_box5_h_16bpc_neon(
&mut *sumsq.offset((-(2 as libc::c_int) * (384 + 16)) as isize),
&mut *sum.offset((-(2 as libc::c_int) * (384 + 16)) as isize),
0 as *const [pixel; 4],
lpf,
stride,
w,
2 as libc::c_int,
edges,
);
}
if edges as libc::c_uint & LR_HAVE_BOTTOM as libc::c_int as libc::c_uint != 0 {
dav1d_sgr_box5_h_16bpc_neon(
&mut *sumsq.offset((h * (384 + 16)) as isize),
&mut *sum.offset((h * (384 + 16)) as isize),
0 as *const [pixel; 4],
lpf.offset((6 * PXSTRIDE(stride)) as isize),
stride,
w,
2 as libc::c_int,
edges,
);
}
dav1d_sgr_box5_v_neon(sumsq, sum, w, h, edges);
dav1d_sgr_calc_ab2_neon(a, b, w, h, strength, bitdepth_max);
dav1d_sgr_finish_filter2_16bpc_neon(tmp, src, stride, a, b, w, h);
}

#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
unsafe extern "C" fn sgr_filter_5x5_neon_erased(
p: *mut libc::c_void,
Expand Down Expand Up @@ -473,6 +381,9 @@ unsafe extern "C" fn sgr_filter_5x5_neon(
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
use crate::include::common::bitdepth::BitDepth;
use crate::src::looprestoration::dav1d_sgr_filter2_neon;

let mut tmp: Align16<[int16_t; 24576]> = Align16([0; 24576]);
dav1d_sgr_filter2_neon(
tmp.0.as_mut_ptr(),
Expand All @@ -484,7 +395,7 @@ unsafe extern "C" fn sgr_filter_5x5_neon(
h,
(*params).sgr.s0 as libc::c_int,
edges,
bitdepth_max,
BitDepth16::from_c(bitdepth_max),
);
dav1d_sgr_weighted1_16bpc_neon(
dst,
Expand Down Expand Up @@ -538,6 +449,7 @@ unsafe extern "C" fn sgr_filter_mix_neon(
) {
use crate::include::common::bitdepth::BitDepth;
use crate::src::looprestoration::dav1d_sgr_filter1_neon;
use crate::src::looprestoration::dav1d_sgr_filter2_neon;

let mut tmp1: Align16<[int16_t; 24576]> = Align16([0; 24576]);
let mut tmp2: Align16<[int16_t; 24576]> = Align16([0; 24576]);
Expand All @@ -551,7 +463,7 @@ unsafe extern "C" fn sgr_filter_mix_neon(
h,
(*params).sgr.s0 as libc::c_int,
edges,
bitdepth_max,
BitDepth16::from_c(bitdepth_max),
);
dav1d_sgr_filter1_neon(
tmp2.0.as_mut_ptr(),
Expand Down
Loading

0 comments on commit 04ee7fd

Please sign in to comment.