Skip to content

Commit

Permalink
Add armv7 assembly for wiener_filter
Browse files Browse the repository at this point in the history
  • Loading branch information
thedataking committed Jul 13, 2023
1 parent 04ddf6d commit 87283d8
Show file tree
Hide file tree
Showing 2 changed files with 187 additions and 8 deletions.
101 changes: 97 additions & 4 deletions src/looprestoration_tmpl_16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,31 @@ extern "C" {
edges: LrEdgeFlags,
);
}
#[cfg(all(feature = "asm", target_arch = "arm"))]
extern "C" {
fn dav1d_wiener_filter_h_16bpc_neon(
dst: *mut int16_t,
left: *const [pixel; 4],
src: *const pixel,
stride: ptrdiff_t,
fh: *const int16_t,
w: intptr_t,
h: libc::c_int,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
);
fn dav1d_wiener_filter_v_16bpc_neon(
dst: *mut pixel,
stride: ptrdiff_t,
mid: *const int16_t,
w: libc::c_int,
h: libc::c_int,
fv: *const int16_t,
edges: LrEdgeFlags,
mid_stride: ptrdiff_t,
bitdepth_max: libc::c_int,
);
}

use crate::src::tables::dav1d_sgr_x_by_x;

Expand Down Expand Up @@ -1072,7 +1097,76 @@ unsafe extern "C" fn loop_restoration_dsp_init_x86(
}
}
}

#[cfg(all(feature = "asm", target_arch = "arm"))]
unsafe extern "C" fn wiener_filter_neon(
dst: *mut pixel,
stride: ptrdiff_t,
left: *const [pixel; 4],
mut lpf: *const pixel,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
bitdepth_max: libc::c_int,
) {
let filter: *const [int16_t; 8] = (*params).filter.0.as_ptr();
let mut mid: Align16<[int16_t; 68 * 384]> = Align16([0; 68 * 384]);
let mut mid_stride: libc::c_int = w + 7 & !7;
dav1d_wiener_filter_h_16bpc_neon(
&mut *mid.0.as_mut_ptr().offset((2 * mid_stride) as isize),
left,
dst,
stride,
(*filter.offset(0)).as_ptr(),
w as intptr_t,
h,
edges,
bitdepth_max,
);
if edges & LR_HAVE_TOP != 0 {
dav1d_wiener_filter_h_16bpc_neon(
mid.0.as_mut_ptr(),
core::ptr::null(),
lpf,
stride,
(*filter.offset(0)).as_ptr(),
w as intptr_t,
2,
edges,
bitdepth_max,
);
}
if edges & LR_HAVE_BOTTOM != 0 {
dav1d_wiener_filter_h_16bpc_neon(
&mut *mid
.0
.as_mut_ptr()
.offset(((2 as libc::c_int + h) * mid_stride) as isize),
core::ptr::null(),
lpf.offset((6 * PXSTRIDE(stride)) as isize),
stride,
(*filter.offset(0)).as_ptr(),
w as intptr_t,
2,
edges,
bitdepth_max,
);
}
dav1d_wiener_filter_v_16bpc_neon(
dst,
stride,
&mut *mid
.0
.as_mut_ptr()
.offset((2 as libc::c_int * mid_stride) as isize),
w,
h,
(*filter.offset(1)).as_ptr(),
edges,
(mid_stride as usize * ::core::mem::size_of::<int16_t>()) as ptrdiff_t,
bitdepth_max,
);
}
#[cfg(feature = "asm")]
use crate::src::cpu::dav1d_get_cpu_flags;

Expand All @@ -1095,9 +1189,8 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm(
(*c).wiener[0] = Some(dav1d_wiener_filter7_16bpc_neon);
(*c).wiener[1] = Some(dav1d_wiener_filter5_16bpc_neon);
} else {
// TODO(perl): enable assembly routines here
// (*c).wiener[0] = Some(dav1d_wiener_filter_neon);
// (*c).wiener[1] = Some(dav1d_wiener_filter_neon);
(*c).wiener[0] = Some(wiener_filter_neon);
(*c).wiener[1] = Some(wiener_filter_neon);
}
}

Expand Down
94 changes: 90 additions & 4 deletions src/looprestoration_tmpl_8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,30 @@ extern "C" {
);
}

#[cfg(all(feature = "asm", target_arch = "arm"))]
extern "C" {
fn dav1d_wiener_filter_h_8bpc_neon(
dst: *mut int16_t,
left: *const [pixel; 4],
src: *const pixel,
stride: ptrdiff_t,
fh: *const int16_t,
w: intptr_t,
h: libc::c_int,
edges: LrEdgeFlags,
);
fn dav1d_wiener_filter_v_8bpc_neon(
dst: *mut pixel,
stride: ptrdiff_t,
mid: *const int16_t,
w: libc::c_int,
h: libc::c_int,
fv: *const int16_t,
edges: LrEdgeFlags,
mid_stride: ptrdiff_t,
);
}

use crate::src::tables::dav1d_sgr_x_by_x;

pub type pixel = uint8_t;
Expand Down Expand Up @@ -1037,7 +1061,70 @@ unsafe extern "C" fn loop_restoration_dsp_init_x86(
(*c).sgr[2] = Some(dav1d_sgr_filter_mix_8bpc_avx512icl);
}
}

#[cfg(all(feature = "asm", target_arch = "arm"))]
unsafe extern "C" fn wiener_filter_neon(
dst: *mut pixel,
stride: ptrdiff_t,
left: *const [pixel; 4],
mut lpf: *const pixel,
w: libc::c_int,
h: libc::c_int,
params: *const LooprestorationParams,
edges: LrEdgeFlags,
) {
let filter: *const [int16_t; 8] = (*params).filter.0.as_ptr();
let mut mid: Align16<[int16_t; 68 * 384]> = Align16([0; 68 * 384]);
let mut mid_stride: libc::c_int = w + 7 & !7;
// Horizontal filter
dav1d_wiener_filter_h_8bpc_neon(
&mut *mid.0.as_mut_ptr().offset((2 * mid_stride) as isize),
left,
dst,
stride,
(*filter.offset(0)).as_ptr(),
w as isize,
h,
edges,
);
if edges & LR_HAVE_TOP != 0 {
dav1d_wiener_filter_h_8bpc_neon(
mid.0.as_mut_ptr(),
core::ptr::null(),
lpf,
stride,
(*filter.offset(0)).as_ptr(),
w as isize,
2,
edges,
);
}
if edges & LR_HAVE_BOTTOM != 0 {
dav1d_wiener_filter_h_8bpc_neon(
&mut *mid
.0
.as_mut_ptr()
.offset(((2 as libc::c_int + h) * mid_stride) as isize),
core::ptr::null(),
lpf.offset((6 * stride) as isize),
stride,
(*filter.offset(0)).as_ptr(),
w as isize,
2,
edges,
);
}
// Vertical filter
dav1d_wiener_filter_v_8bpc_neon(
dst,
stride,
&mut *mid.0.as_mut_ptr().offset((2 * mid_stride) as isize),
w,
h,
(*filter.offset(1)).as_ptr(),
edges,
(mid_stride as usize * ::core::mem::size_of::<int16_t>()) as ptrdiff_t,
);
}
#[cfg(feature = "asm")]
use crate::src::cpu::dav1d_get_cpu_flags;

Expand All @@ -1060,9 +1147,8 @@ unsafe extern "C" fn loop_restoration_dsp_init_arm(
(*c).wiener[0] = Some(dav1d_wiener_filter7_8bpc_neon);
(*c).wiener[1] = Some(dav1d_wiener_filter5_8bpc_neon);
} else {
// TODO(perl): enable assembly routines here
// (*c).wiener[0] = Some(dav1d_wiener_filter_neon);
// (*c).wiener[1] = Some(dav1d_wiener_filter_neon);
(*c).wiener[0] = Some(wiener_filter_neon);
(*c).wiener[1] = Some(wiener_filter_neon);
}
}

Expand Down

0 comments on commit 87283d8

Please sign in to comment.