From f8c27fe30748c752a3c4fb678bd6a474d98e9886 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 01:08:55 -0700 Subject: [PATCH 01/49] `src/ipred.rs`: Combine `src/ipred_tmpl.rs` with `src/ipred.rs`. --- lib.rs | 1 - src/ipred.rs | 38 ++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl.rs | 39 --------------------------------------- src/ipred_tmpl_16.rs | 2 +- src/ipred_tmpl_8.rs | 2 +- 5 files changed, 40 insertions(+), 42 deletions(-) delete mode 100644 src/ipred_tmpl.rs diff --git a/lib.rs b/lib.rs index f399af9b6..12c889498 100644 --- a/lib.rs +++ b/lib.rs @@ -59,7 +59,6 @@ pub mod src { mod intra_edge; mod ipred; mod ipred_prepare; - mod ipred_tmpl; #[cfg(feature = "bitdepth_16")] mod ipred_tmpl_16; #[cfg(feature = "bitdepth_8")] diff --git a/src/ipred.rs b/src/ipred.rs index a35dffd73..babdf50c7 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1,4 +1,5 @@ use crate::include::common::bitdepth::DynPixel; +use cfg_if::cfg_if; use libc::ptrdiff_t; use std::ffi::c_int; @@ -155,3 +156,40 @@ extern "C" { decl_fns!(pal_pred, pal_pred); } + +pub unsafe fn filter_fn( + flt_ptr: *const i8, + p0: c_int, + p1: c_int, + p2: c_int, + p3: c_int, + p4: c_int, + p5: c_int, + p6: c_int, +) -> c_int { + if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { + *flt_ptr.offset(0) as c_int * p0 + + *flt_ptr.offset(1) as c_int * p1 + + *flt_ptr.offset(16) as c_int * p2 + + *flt_ptr.offset(17) as c_int * p3 + + *flt_ptr.offset(32) as c_int * p4 + + *flt_ptr.offset(33) as c_int * p5 + + *flt_ptr.offset(48) as c_int * p6 + } else { + *flt_ptr.offset(0) as c_int * p0 + + *flt_ptr.offset(8) as c_int * p1 + + *flt_ptr.offset(16) as c_int * p2 + + *flt_ptr.offset(24) as c_int * p3 + + *flt_ptr.offset(32) as c_int * p4 + + *flt_ptr.offset(40) as c_int * p5 + + *flt_ptr.offset(48) as c_int * p6 + } +} + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + pub const FLT_INCR: isize = 2; + } else { + pub const FLT_INCR: isize = 1; + } +} diff --git a/src/ipred_tmpl.rs b/src/ipred_tmpl.rs deleted file mode 100644 index edde28279..000000000 --- a/src/ipred_tmpl.rs +++ /dev/null @@ -1,39 +0,0 @@ -use cfg_if::cfg_if; -use std::ffi::c_int; - -pub unsafe fn filter_fn( - flt_ptr: *const i8, - p0: c_int, - p1: c_int, - p2: c_int, - p3: c_int, - p4: c_int, - p5: c_int, - p6: c_int, -) -> c_int { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - *flt_ptr.offset(0) as c_int * p0 - + *flt_ptr.offset(1) as c_int * p1 - + *flt_ptr.offset(16) as c_int * p2 - + *flt_ptr.offset(17) as c_int * p3 - + *flt_ptr.offset(32) as c_int * p4 - + *flt_ptr.offset(33) as c_int * p5 - + *flt_ptr.offset(48) as c_int * p6 - } else { - *flt_ptr.offset(0) as c_int * p0 - + *flt_ptr.offset(8) as c_int * p1 - + *flt_ptr.offset(16) as c_int * p2 - + *flt_ptr.offset(24) as c_int * p3 - + *flt_ptr.offset(32) as c_int * p4 - + *flt_ptr.offset(40) as c_int * p5 - + *flt_ptr.offset(48) as c_int * p6 - } -} - -cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - pub const FLT_INCR: isize = 2; - } else { - pub const FLT_INCR: isize = 1; - } -} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 5747cf7ef..f5b298881 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -1279,7 +1279,7 @@ unsafe fn ipred_filter_rust( _max_height: c_int, bitdepth_max: c_int, ) { - use crate::src::ipred_tmpl::{filter_fn, FLT_INCR}; + use crate::src::ipred::{filter_fn, FLT_INCR}; filt_idx &= 511 as c_int; if !(filt_idx < 5) { diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index afec21e5e..4607f0e9d 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -1197,7 +1197,7 @@ unsafe fn ipred_filter_rust( _max_width: c_int, _max_height: c_int, ) { - use crate::src::ipred_tmpl::{filter_fn, FLT_INCR}; + use crate::src::ipred::{filter_fn, FLT_INCR}; filt_idx &= 511 as c_int; if !(filt_idx < 5) { From 16151f260d192512c4ea52adf193e2e86f87f7c5 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 01:17:43 -0700 Subject: [PATCH 02/49] `trait DefaultValue`: `impl` for `Option`. --- src/enum_map.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/enum_map.rs b/src/enum_map.rs index 0cc6d96be..64919b92e 100644 --- a/src/enum_map.rs +++ b/src/enum_map.rs @@ -15,6 +15,10 @@ pub trait DefaultValue { const DEFAULT: Self; } +impl DefaultValue for Option { + const DEFAULT: Self = None; +} + /// A map from an `enum` key `K` to `V`s. /// `N` is the number of possible `enum` values. pub struct EnumMap From 3904463df4f3cb3cffe95c73f46b5d8b5f944fc8 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 01:18:07 -0700 Subject: [PATCH 03/49] `src/ipred.rs`: Re-order items to order in C. --- src/ipred.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index babdf50c7..f89b2c8df 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -3,11 +3,6 @@ use cfg_if::cfg_if; use libc::ptrdiff_t; use std::ffi::c_int; -#[inline] -pub unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { - return (angle < 40 && wh <= 16 >> is_sm) as c_int; -} - pub type angular_ipred_fn = unsafe extern "C" fn( *mut DynPixel, ptrdiff_t, @@ -157,6 +152,11 @@ extern "C" { decl_fns!(pal_pred, pal_pred); } +#[inline] +pub unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { + return (angle < 40 && wh <= 16 >> is_sm) as c_int; +} + pub unsafe fn filter_fn( flt_ptr: *const i8, p0: c_int, From 4b4044f7e969bdd4de84d25963a91d4082e7de03 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 01:21:25 -0700 Subject: [PATCH 04/49] `src/ipred.rs`: Mark applicable items as temporarily `pub` until the rest of the `mod` is deduplicated. --- src/ipred.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index f89b2c8df..ea16312d6 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -152,12 +152,14 @@ extern "C" { decl_fns!(pal_pred, pal_pred); } +// TODO(kkysen) Temporarily pub until mod is deduplicated #[inline] -pub unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { +pub(crate) unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { return (angle < 40 && wh <= 16 >> is_sm) as c_int; } -pub unsafe fn filter_fn( +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn filter_fn( flt_ptr: *const i8, p0: c_int, p1: c_int, @@ -188,8 +190,10 @@ pub unsafe fn filter_fn( cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - pub const FLT_INCR: isize = 2; + // TODO(kkysen) Temporarily pub until mod is deduplicated + pub(crate) const FLT_INCR: isize = 2; } else { - pub const FLT_INCR: isize = 1; + // TODO(kkysen) Temporarily pub until mod is deduplicated + pub(crate) const FLT_INCR: isize = 1; } } From a44ec08a458569d5cae1d53996db9c7e9f2af3d4 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 01:22:32 -0700 Subject: [PATCH 05/49] `fn get_filter_strength`: Deduplicate. --- src/ipred.rs | 57 ++++++++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 57 +------------------------------------------- src/ipred_tmpl_8.rs | 57 +------------------------------------------- 3 files changed, 59 insertions(+), 112 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index ea16312d6..44839a0b4 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -152,6 +152,63 @@ extern "C" { decl_fns!(pal_pred, pal_pred); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[inline(never)] +pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { + if is_sm != 0 { + if wh <= 8 { + if angle >= 64 { + return 2 as c_int; + } + if angle >= 40 { + return 1 as c_int; + } + } else if wh <= 16 { + if angle >= 48 { + return 2 as c_int; + } + if angle >= 20 { + return 1 as c_int; + } + } else if wh <= 24 { + if angle >= 4 { + return 3 as c_int; + } + } else { + return 3 as c_int; + } + } else if wh <= 8 { + if angle >= 56 { + return 1 as c_int; + } + } else if wh <= 16 { + if angle >= 40 { + return 1 as c_int; + } + } else if wh <= 24 { + if angle >= 32 { + return 3 as c_int; + } + if angle >= 16 { + return 2 as c_int; + } + if angle >= 8 { + return 1 as c_int; + } + } else if wh <= 32 { + if angle >= 32 { + return 3 as c_int; + } + if angle >= 4 { + return 2 as c_int; + } + return 1 as c_int; + } else { + return 3 as c_int; + } + return 0 as c_int; +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline] pub(crate) unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index f5b298881..7ab5c8b71 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -3,6 +3,7 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -729,62 +730,6 @@ unsafe fn ipred_smooth_h_rust( } } -#[inline(never)] -unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { - if is_sm != 0 { - if wh <= 8 { - if angle >= 64 { - return 2 as c_int; - } - if angle >= 40 { - return 1 as c_int; - } - } else if wh <= 16 { - if angle >= 48 { - return 2 as c_int; - } - if angle >= 20 { - return 1 as c_int; - } - } else if wh <= 24 { - if angle >= 4 { - return 3 as c_int; - } - } else { - return 3 as c_int; - } - } else if wh <= 8 { - if angle >= 56 { - return 1 as c_int; - } - } else if wh <= 16 { - if angle >= 40 { - return 1 as c_int; - } - } else if wh <= 24 { - if angle >= 32 { - return 3 as c_int; - } - if angle >= 16 { - return 2 as c_int; - } - if angle >= 8 { - return 1 as c_int; - } - } else if wh <= 32 { - if angle >= 32 { - return 3 as c_int; - } - if angle >= 4 { - return 2 as c_int; - } - return 1 as c_int; - } else { - return 3 as c_int; - } - return 0 as c_int; -} - #[inline(never)] unsafe fn filter_edge( out: *mut pixel, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 4607f0e9d..a87c85c5b 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -4,6 +4,7 @@ use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -673,62 +674,6 @@ unsafe fn ipred_smooth_h_rust( } } -#[inline(never)] -unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { - if is_sm != 0 { - if wh <= 8 { - if angle >= 64 { - return 2 as c_int; - } - if angle >= 40 { - return 1 as c_int; - } - } else if wh <= 16 { - if angle >= 48 { - return 2 as c_int; - } - if angle >= 20 { - return 1 as c_int; - } - } else if wh <= 24 { - if angle >= 4 { - return 3 as c_int; - } - } else { - return 3 as c_int; - } - } else if wh <= 8 { - if angle >= 56 { - return 1 as c_int; - } - } else if wh <= 16 { - if angle >= 40 { - return 1 as c_int; - } - } else if wh <= 24 { - if angle >= 32 { - return 3 as c_int; - } - if angle >= 16 { - return 2 as c_int; - } - if angle >= 8 { - return 1 as c_int; - } - } else if wh <= 32 { - if angle >= 32 { - return 3 as c_int; - } - if angle >= 4 { - return 2 as c_int; - } - return 1 as c_int; - } else { - return 3 as c_int; - } - return 0 as c_int; -} - #[inline(never)] unsafe fn filter_edge( out: *mut pixel, From ca1d6867bfec32b722debcbb233055cc41d8ac24 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 01:38:08 -0700 Subject: [PATCH 06/49] `fn splat_dc`: Deduplicate w/ generics. --- src/ipred.rs | 71 ++++++++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 55 +++++++++++----------------------- src/ipred_tmpl_8.rs | 53 +++++++-------------------------- 3 files changed, 98 insertions(+), 81 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 44839a0b4..94d02ccdd 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1,7 +1,13 @@ +use crate::include::common::bitdepth::AsPrimitive; +use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::DynPixel; +use crate::include::common::bitdepth::BPC; use cfg_if::cfg_if; use libc::ptrdiff_t; use std::ffi::c_int; +use std::ffi::c_uint; +use std::ffi::c_ulong; +use std::ffi::c_ulonglong; pub type angular_ipred_fn = unsafe extern "C" fn( *mut DynPixel, @@ -152,6 +158,71 @@ extern "C" { decl_fns!(pal_pred, pal_pred); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[inline(never)] +pub(crate) unsafe fn splat_dc( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + width: c_int, + height: c_int, + dc: c_int, + bd: BD, +) { + match BD::BPC { + BPC::BPC8 => { + if !(dc <= 0xff as c_int) { + unreachable!(); + } + if width > 4 { + let dcN: u64 = + (dc as c_ulonglong).wrapping_mul(0x101010101010101 as c_ulonglong) as u64; + let mut y = 0; + while y < height { + let mut x = 0; + while x < width { + *(&mut *dst.offset(x as isize) as *mut BD::Pixel as *mut u64) = dcN; + x = (x as c_ulong).wrapping_add(::core::mem::size_of::() as c_ulong) + as c_int as c_int; + } + dst = dst.offset(stride as isize); + y += 1; + } + } else { + let dcN_0: c_uint = (dc as c_uint).wrapping_mul(0x1010101 as c_uint); + let mut y_0 = 0; + while y_0 < height { + let mut x_0 = 0; + while x_0 < width { + *(&mut *dst.offset(x_0 as isize) as *mut BD::Pixel as *mut c_uint) = dcN_0; + x_0 = (x_0 as c_ulong) + .wrapping_add(::core::mem::size_of::() as c_ulong) + as c_int as c_int; + } + dst = dst.offset(stride as isize); + y_0 += 1; + } + }; + } + BPC::BPC16 => { + if !(dc <= bd.bitdepth_max().as_::()) { + unreachable!(); + } + let dcN: u64 = (dc as c_ulonglong).wrapping_mul(0x1000100010001 as c_ulonglong) as u64; + let mut y = 0; + while y < height { + let mut x = 0; + while x < width { + *(&mut *dst.offset(x as isize) as *mut BD::Pixel as *mut u64) = dcN; + x = (x as c_ulong).wrapping_add(::core::mem::size_of::() as c_ulong >> 1) + as c_int as c_int; + } + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } + } + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 7ab5c8b71..b32909287 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -1,10 +1,13 @@ use crate::include::common::attributes::ctz; +use crate::include::common::bitdepth::BitDepth; +use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -28,8 +31,6 @@ use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; -use std::ffi::c_ulong; -use std::ffi::c_ulonglong; use std::ffi::c_void; #[cfg(feature = "asm")] @@ -149,32 +150,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -#[inline(never)] -unsafe fn splat_dc( - mut dst: *mut pixel, - stride: ptrdiff_t, - width: c_int, - height: c_int, - dc: c_int, - bitdepth_max: c_int, -) { - if !(dc <= bitdepth_max) { - unreachable!(); - } - let dcN: u64 = (dc as c_ulonglong).wrapping_mul(0x1000100010001 as c_ulonglong) as u64; - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - *(&mut *dst.offset(x as isize) as *mut pixel as *mut u64) = dcN; - x = (x as c_ulong).wrapping_add(::core::mem::size_of::() as c_ulong >> 1) as c_int - as c_int; - } - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } -} - #[inline(never)] unsafe fn cfl_pred( mut dst: *mut pixel, @@ -225,13 +200,13 @@ unsafe extern "C" fn ipred_dc_top_c_erased( _max_height: c_int, bitdepth_max: c_int, ) { - splat_dc( + splat_dc::( dst.cast(), stride, width, height, dc_gen_top(topleft.cast(), width) as c_int, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } @@ -278,13 +253,13 @@ unsafe extern "C" fn ipred_dc_left_c_erased( _max_height: c_int, bitdepth_max: c_int, ) { - splat_dc( + splat_dc::( dst.cast(), stride, width, height, dc_gen_left(topleft.cast(), height) as c_int, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } @@ -348,13 +323,13 @@ unsafe extern "C" fn ipred_dc_c_erased( _max_height: c_int, bitdepth_max: c_int, ) { - splat_dc( + splat_dc::( dst.cast(), stride, width, height, dc_gen(topleft.cast(), width, height) as c_int, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } @@ -393,7 +368,14 @@ unsafe extern "C" fn ipred_dc_128_c_erased( bitdepth_max: c_int, ) { let dc = bitdepth_max + 1 >> 1; - splat_dc(dst.cast(), stride, width, height, dc, bitdepth_max); + splat_dc::( + dst.cast(), + stride, + width, + height, + dc, + BitDepth16::from_c(bitdepth_max), + ); } unsafe extern "C" fn ipred_cfl_128_c_erased( @@ -1746,9 +1728,6 @@ unsafe fn ipred_z2_neon( max_height: c_int, bitdepth_max: c_int, ) { - use crate::include::common::bitdepth::BitDepth; - use crate::include::common::bitdepth::BitDepth16; - let is_sm = angle >> 9 & 0x1 as c_int; let enable_intra_edge_filter = angle >> 10; angle &= 511 as c_int; diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index a87c85c5b..376581a3d 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -1,4 +1,6 @@ use crate::include::common::attributes::ctz; +use crate::include::common::bitdepth::BitDepth; +use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; @@ -6,6 +8,7 @@ use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -30,8 +33,6 @@ use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; -use std::ffi::c_ulong; -use std::ffi::c_ulonglong; use std::ffi::c_void; #[cfg(feature = "asm")] @@ -128,40 +129,6 @@ extern "C" { pub type pixel = u8; -#[inline(never)] -unsafe fn splat_dc(mut dst: *mut pixel, stride: ptrdiff_t, width: c_int, height: c_int, dc: c_int) { - if !(dc <= 0xff as c_int) { - unreachable!(); - } - if width > 4 { - let dcN: u64 = (dc as c_ulonglong).wrapping_mul(0x101010101010101 as c_ulonglong) as u64; - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - *(&mut *dst.offset(x as isize) as *mut pixel as *mut u64) = dcN; - x = (x as c_ulong).wrapping_add(::core::mem::size_of::() as c_ulong) as c_int - as c_int; - } - dst = dst.offset(stride as isize); - y += 1; - } - } else { - let dcN_0: c_uint = (dc as c_uint).wrapping_mul(0x1010101 as c_uint); - let mut y_0 = 0; - while y_0 < height { - let mut x_0 = 0; - while x_0 < width { - *(&mut *dst.offset(x_0 as isize) as *mut pixel as *mut c_uint) = dcN_0; - x_0 = (x_0 as c_ulong).wrapping_add(::core::mem::size_of::() as c_ulong) - as c_int as c_int; - } - dst = dst.offset(stride as isize); - y_0 += 1; - } - }; -} - #[inline(never)] unsafe fn cfl_pred( mut dst: *mut pixel, @@ -208,12 +175,13 @@ unsafe extern "C" fn ipred_dc_top_c_erased( _max_height: c_int, _bitdepth_max: c_int, ) { - splat_dc( + splat_dc::( dst.cast(), stride, width, height, dc_gen_top(topleft.cast(), width) as c_int, + BitDepth8::new(()), ); } @@ -259,12 +227,13 @@ unsafe extern "C" fn ipred_dc_left_c_erased( _max_height: c_int, _bitdepth_max: c_int, ) { - splat_dc( + splat_dc::( dst.cast(), stride, width, height, dc_gen_left(topleft.cast(), height) as c_int, + BitDepth8::new(()), ); } @@ -319,12 +288,13 @@ unsafe extern "C" fn ipred_dc_c_erased( _max_height: c_int, _bitdepth_max: c_int, ) { - splat_dc( + splat_dc::( dst.cast(), stride, width, height, dc_gen(topleft.cast(), width, height) as c_int, + BitDepth8::new(()), ); } @@ -354,7 +324,7 @@ unsafe extern "C" fn ipred_dc_128_c_erased( _bitdepth_max: c_int, ) { let dc = 128; - splat_dc(dst.cast(), stride, width, height, dc); + splat_dc::(dst.cast(), stride, width, height, dc, BitDepth8::new(())); } unsafe extern "C" fn ipred_cfl_128_c_erased( @@ -1663,9 +1633,6 @@ unsafe fn ipred_z2_neon( max_width: c_int, max_height: c_int, ) { - use crate::include::common::bitdepth::BitDepth; - use crate::include::common::bitdepth::BitDepth8; - let is_sm = angle >> 9 & 0x1 as c_int; let enable_intra_edge_filter = angle >> 10; angle &= 511 as c_int; From 91e022d8131c543613bfbb679a1b364b16e4244f Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 01:58:03 -0700 Subject: [PATCH 07/49] `fn cfl_pred`: Deduplicate w/ generics. --- src/ipred.rs | 27 +++++++++++++++++++ src/ipred_tmpl_16.rs | 47 +++++++-------------------------- src/ipred_tmpl_8.rs | 63 +++++++++++++++++++++++--------------------- 3 files changed, 69 insertions(+), 68 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 94d02ccdd..331a3f1a6 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -2,6 +2,7 @@ use crate::include::common::bitdepth::AsPrimitive; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::DynPixel; use crate::include::common::bitdepth::BPC; +use crate::include::common::intops::apply_sign; use cfg_if::cfg_if; use libc::ptrdiff_t; use std::ffi::c_int; @@ -223,6 +224,32 @@ pub(crate) unsafe fn splat_dc( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[inline(never)] +pub(crate) unsafe fn cfl_pred( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + width: c_int, + height: c_int, + dc: c_int, + mut ac: *const i16, + alpha: c_int, + bd: BD, +) { + let mut y = 0; + while y < height { + let mut x = 0; + while x < width { + let diff = alpha * *ac.offset(x as isize) as c_int; + *dst.offset(x as isize) = bd.iclip_pixel(dc + apply_sign(diff.abs() + 32 >> 6, diff)); + x += 1; + } + ac = ac.offset(width as isize); + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index b32909287..ae0a103ac 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -2,9 +2,9 @@ use crate::include::common::attributes::ctz; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; -use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::ipred::cfl_pred; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::splat_dc; @@ -150,35 +150,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -#[inline(never)] -unsafe fn cfl_pred( - mut dst: *mut pixel, - stride: ptrdiff_t, - width: c_int, - height: c_int, - dc: c_int, - mut ac: *const i16, - alpha: c_int, - bitdepth_max: c_int, -) { - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - let diff = alpha * *ac.offset(x as isize) as c_int; - *dst.offset(x as isize) = iclip( - dc + apply_sign(diff.abs() + 32 >> 6, diff), - 0 as c_int, - bitdepth_max, - ) as pixel; - x += 1; - } - ac = ac.offset(width as isize); - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } -} - unsafe fn dc_gen_top(topleft: *const pixel, width: c_int) -> c_uint { let mut dc: c_uint = (width >> 1) as c_uint; let mut i = 0; @@ -220,7 +191,7 @@ unsafe extern "C" fn ipred_cfl_top_c_erased( alpha: c_int, bitdepth_max: c_int, ) { - cfl_pred( + cfl_pred::( dst.cast(), stride, width, @@ -228,7 +199,7 @@ unsafe extern "C" fn ipred_cfl_top_c_erased( dc_gen_top(topleft.cast(), width) as c_int, ac, alpha, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } @@ -274,7 +245,7 @@ unsafe extern "C" fn ipred_cfl_left_c_erased( bitdepth_max: c_int, ) { let dc: c_uint = dc_gen_left(topleft.cast(), height); - cfl_pred( + cfl_pred::( dst.cast(), stride, width, @@ -282,7 +253,7 @@ unsafe extern "C" fn ipred_cfl_left_c_erased( dc as c_int, ac, alpha, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } @@ -344,7 +315,7 @@ unsafe extern "C" fn ipred_cfl_c_erased( bitdepth_max: c_int, ) { let dc: c_uint = dc_gen(topleft.cast(), width, height); - cfl_pred( + cfl_pred::( dst.cast(), stride, width, @@ -352,7 +323,7 @@ unsafe extern "C" fn ipred_cfl_c_erased( dc as c_int, ac, alpha, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } @@ -389,7 +360,7 @@ unsafe extern "C" fn ipred_cfl_128_c_erased( bitdepth_max: c_int, ) { let dc = bitdepth_max + 1 >> 1; - cfl_pred( + cfl_pred::( dst.cast(), stride, width, @@ -397,7 +368,7 @@ unsafe extern "C" fn ipred_cfl_128_c_erased( dc, ac, alpha, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 376581a3d..ebd78d82c 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -2,10 +2,10 @@ use crate::include::common::attributes::ctz; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; -use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::ipred::cfl_pred; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::splat_dc; @@ -129,31 +129,6 @@ extern "C" { pub type pixel = u8; -#[inline(never)] -unsafe fn cfl_pred( - mut dst: *mut pixel, - stride: ptrdiff_t, - width: c_int, - height: c_int, - dc: c_int, - mut ac: *const i16, - alpha: c_int, -) { - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - let diff = alpha * *ac.offset(x as isize) as c_int; - *dst.offset(x as isize) = - iclip_u8(dc + apply_sign(diff.abs() + 32 >> 6, diff)) as pixel; - x += 1; - } - ac = ac.offset(width as isize); - dst = dst.offset(stride as isize); - y += 1; - } -} - unsafe fn dc_gen_top(topleft: *const pixel, width: c_int) -> c_uint { let mut dc: c_uint = (width >> 1) as c_uint; let mut i = 0; @@ -195,7 +170,7 @@ unsafe extern "C" fn ipred_cfl_top_c_erased( alpha: c_int, _bitdepth_max: c_int, ) { - cfl_pred( + cfl_pred::( dst.cast(), stride, width, @@ -203,6 +178,7 @@ unsafe extern "C" fn ipred_cfl_top_c_erased( dc_gen_top(topleft.cast(), width) as c_int, ac, alpha, + BitDepth8::new(()), ); } @@ -248,7 +224,16 @@ unsafe extern "C" fn ipred_cfl_left_c_erased( _bitdepth_max: c_int, ) { let dc: c_uint = dc_gen_left(topleft.cast(), height); - cfl_pred(dst.cast(), stride, width, height, dc as c_int, ac, alpha); + cfl_pred::( + dst.cast(), + stride, + width, + height, + dc as c_int, + ac, + alpha, + BitDepth8::new(()), + ); } unsafe fn dc_gen(topleft: *const pixel, width: c_int, height: c_int) -> c_uint { @@ -309,7 +294,16 @@ unsafe extern "C" fn ipred_cfl_c_erased( _bitdepth_max: c_int, ) { let dc: c_uint = dc_gen(topleft.cast(), width, height); - cfl_pred(dst.cast(), stride, width, height, dc as c_int, ac, alpha); + cfl_pred::( + dst.cast(), + stride, + width, + height, + dc as c_int, + ac, + alpha, + BitDepth8::new(()), + ); } unsafe extern "C" fn ipred_dc_128_c_erased( @@ -338,7 +332,16 @@ unsafe extern "C" fn ipred_cfl_128_c_erased( _bitdepth_max: c_int, ) { let dc = 128; - cfl_pred(dst.cast(), stride, width, height, dc, ac, alpha); + cfl_pred::( + dst.cast(), + stride, + width, + height, + dc, + ac, + alpha, + BitDepth8::new(()), + ); } unsafe extern "C" fn ipred_v_c_erased( From 9e34aff8af624e4882dea65129b41c2d1349707d Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 02:01:24 -0700 Subject: [PATCH 08/49] `fn dc_gen_top`: Deduplicate w/ generics. --- src/ipred.rs | 12 ++++++++++++ src/ipred_tmpl_16.rs | 15 +++------------ src/ipred_tmpl_8.rs | 15 +++------------ 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 331a3f1a6..3764785ae 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1,3 +1,4 @@ +use crate::include::common::attributes::ctz; use crate::include::common::bitdepth::AsPrimitive; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::DynPixel; @@ -250,6 +251,17 @@ pub(crate) unsafe fn cfl_pred( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn dc_gen_top(topleft: *const BD::Pixel, width: c_int) -> c_uint { + let mut dc: c_uint = (width >> 1) as c_uint; + let mut i = 0; + while i < width { + dc = dc.wrapping_add((*topleft.offset((1 + i) as isize)).as_::()); + i += 1; + } + return dc >> ctz(width as c_uint); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index ae0a103ac..3a514dadf 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -5,6 +5,7 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; +use crate::src::ipred::dc_gen_top; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::splat_dc; @@ -150,16 +151,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe fn dc_gen_top(topleft: *const pixel, width: c_int) -> c_uint { - let mut dc: c_uint = (width >> 1) as c_uint; - let mut i = 0; - while i < width { - dc = dc.wrapping_add(*topleft.offset((1 + i) as isize) as c_uint); - i += 1; - } - return dc >> ctz(width as c_uint); -} - unsafe extern "C" fn ipred_dc_top_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -176,7 +167,7 @@ unsafe extern "C" fn ipred_dc_top_c_erased( stride, width, height, - dc_gen_top(topleft.cast(), width) as c_int, + dc_gen_top::(topleft.cast(), width) as c_int, BitDepth16::from_c(bitdepth_max), ); } @@ -196,7 +187,7 @@ unsafe extern "C" fn ipred_cfl_top_c_erased( stride, width, height, - dc_gen_top(topleft.cast(), width) as c_int, + dc_gen_top::(topleft.cast(), width) as c_int, ac, alpha, BitDepth16::from_c(bitdepth_max), diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index ebd78d82c..a23e55dbc 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -6,6 +6,7 @@ use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; +use crate::src::ipred::dc_gen_top; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::splat_dc; @@ -129,16 +130,6 @@ extern "C" { pub type pixel = u8; -unsafe fn dc_gen_top(topleft: *const pixel, width: c_int) -> c_uint { - let mut dc: c_uint = (width >> 1) as c_uint; - let mut i = 0; - while i < width { - dc = dc.wrapping_add(*topleft.offset((1 + i) as isize) as c_uint); - i += 1; - } - return dc >> ctz(width as c_uint); -} - unsafe extern "C" fn ipred_dc_top_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -155,7 +146,7 @@ unsafe extern "C" fn ipred_dc_top_c_erased( stride, width, height, - dc_gen_top(topleft.cast(), width) as c_int, + dc_gen_top::(topleft.cast(), width) as c_int, BitDepth8::new(()), ); } @@ -175,7 +166,7 @@ unsafe extern "C" fn ipred_cfl_top_c_erased( stride, width, height, - dc_gen_top(topleft.cast(), width) as c_int, + dc_gen_top::(topleft.cast(), width) as c_int, ac, alpha, BitDepth8::new(()), From 9f1fd0e8608b9f1cdbe13138b62c076252d23cdc Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 20:41:17 -0700 Subject: [PATCH 09/49] `fn ipred_dc_top_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 22 ++++++++++++++++++++++ src/ipred_tmpl_16.rs | 24 ++---------------------- src/ipred_tmpl_8.rs | 24 ++---------------------- 3 files changed, 26 insertions(+), 44 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 3764785ae..b1b39287c 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -262,6 +262,28 @@ pub(crate) unsafe fn dc_gen_top(topleft: *const BD::Pixel, width: return dc >> ctz(width as c_uint); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_dc_top_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + bitdepth_max: c_int, +) { + splat_dc::( + dst.cast(), + stride, + width, + height, + dc_gen_top::(topleft.cast(), width) as c_int, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 3a514dadf..388fde4e5 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -8,6 +8,7 @@ use crate::src::ipred::cfl_pred; use crate::src::ipred::dc_gen_top; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -151,27 +152,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_dc_top_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - splat_dc::( - dst.cast(), - stride, - width, - height, - dc_gen_top::(topleft.cast(), width) as c_int, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe extern "C" fn ipred_cfl_top_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1970,7 +1950,7 @@ unsafe fn ipred_z1_neon( pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased); (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased); + (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased); (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased); (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased); diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index a23e55dbc..3b85428a8 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -9,6 +9,7 @@ use crate::src::ipred::cfl_pred; use crate::src::ipred::dc_gen_top; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -130,27 +131,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_dc_top_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - splat_dc::( - dst.cast(), - stride, - width, - height, - dc_gen_top::(topleft.cast(), width) as c_int, - BitDepth8::new(()), - ); -} - unsafe extern "C" fn ipred_cfl_top_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1901,7 +1881,7 @@ unsafe fn ipred_z1_neon( pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased); (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased); + (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased); (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased); (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased); From 1f2c31602059058dcd9b8e10a89bbd2284e1a685 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 20:43:33 -0700 Subject: [PATCH 10/49] `fn ipred_cfl_top_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 26 ++++++++++++++++++++++++-- src/ipred_tmpl_16.rs | 26 ++------------------------ src/ipred_tmpl_8.rs | 26 ++------------------------ 3 files changed, 28 insertions(+), 50 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index b1b39287c..a9c3f4b8b 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -251,8 +251,7 @@ pub(crate) unsafe fn cfl_pred( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn dc_gen_top(topleft: *const BD::Pixel, width: c_int) -> c_uint { +unsafe fn dc_gen_top(topleft: *const BD::Pixel, width: c_int) -> c_uint { let mut dc: c_uint = (width >> 1) as c_uint; let mut i = 0; while i < width { @@ -284,6 +283,29 @@ pub(crate) unsafe extern "C" fn ipred_dc_top_c_erased( ); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_cfl_top_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + ac: *const i16, + alpha: c_int, + bitdepth_max: c_int, +) { + cfl_pred::( + dst.cast(), + stride, + width, + height, + dc_gen_top::(topleft.cast(), width) as c_int, + ac, + alpha, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 388fde4e5..b3439c57b 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -5,9 +5,9 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; -use crate::src::ipred::dc_gen_top; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_cfl_top_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; @@ -152,28 +152,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_cfl_top_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - ac: *const i16, - alpha: c_int, - bitdepth_max: c_int, -) { - cfl_pred::( - dst.cast(), - stride, - width, - height, - dc_gen_top::(topleft.cast(), width) as c_int, - ac, - alpha, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe fn dc_gen_left(topleft: *const pixel, height: c_int) -> c_uint { let mut dc: c_uint = (height >> 1) as c_uint; let mut i = 0; @@ -1969,7 +1947,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased; (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased; - (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased; + (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased; (*c).pal_pred = pal_pred_c_erased; diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 3b85428a8..b3b5eef17 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -6,9 +6,9 @@ use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; -use crate::src::ipred::dc_gen_top; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_cfl_top_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; @@ -131,28 +131,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_cfl_top_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - ac: *const i16, - alpha: c_int, - _bitdepth_max: c_int, -) { - cfl_pred::( - dst.cast(), - stride, - width, - height, - dc_gen_top::(topleft.cast(), width) as c_int, - ac, - alpha, - BitDepth8::new(()), - ); -} - unsafe fn dc_gen_left(topleft: *const pixel, height: c_int) -> c_uint { let mut dc: c_uint = (height >> 1) as c_uint; let mut i = 0; @@ -1899,7 +1877,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased; (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased; (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased; - (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased; + (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased; (*c).pal_pred = pal_pred_c_erased; From e0ac57f8d6dbf554fd457870772900a6975c4a08 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 20:49:58 -0700 Subject: [PATCH 11/49] `fn dc_gen_left`: Deduplicate w/ generics. --- src/ipred.rs | 11 +++++++++++ src/ipred_tmpl_16.rs | 15 +++------------ src/ipred_tmpl_8.rs | 15 +++------------ 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index a9c3f4b8b..7e19ba9f1 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -306,6 +306,17 @@ pub(crate) unsafe extern "C" fn ipred_cfl_top_c_erased( ); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn dc_gen_left(topleft: *const BD::Pixel, height: c_int) -> c_uint { + let mut dc: c_uint = (height >> 1) as c_uint; + let mut i = 0; + while i < height { + dc = dc.wrapping_add((*topleft.offset(-(1 + i) as isize)).as_::()); + i += 1; + } + return dc >> ctz(height as c_uint); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index b3439c57b..2b4a2e33f 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -5,6 +5,7 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; +use crate::src::ipred::dc_gen_left; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_top_c_erased; @@ -152,16 +153,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe fn dc_gen_left(topleft: *const pixel, height: c_int) -> c_uint { - let mut dc: c_uint = (height >> 1) as c_uint; - let mut i = 0; - while i < height { - dc = dc.wrapping_add(*topleft.offset(-(1 + i) as isize) as c_uint); - i += 1; - } - return dc >> ctz(height as c_uint); -} - unsafe extern "C" fn ipred_dc_left_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -178,7 +169,7 @@ unsafe extern "C" fn ipred_dc_left_c_erased( stride, width, height, - dc_gen_left(topleft.cast(), height) as c_int, + dc_gen_left::(topleft.cast(), height) as c_int, BitDepth16::from_c(bitdepth_max), ); } @@ -193,7 +184,7 @@ unsafe extern "C" fn ipred_cfl_left_c_erased( alpha: c_int, bitdepth_max: c_int, ) { - let dc: c_uint = dc_gen_left(topleft.cast(), height); + let dc: c_uint = dc_gen_left::(topleft.cast(), height); cfl_pred::( dst.cast(), stride, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index b3b5eef17..4b223d147 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -6,6 +6,7 @@ use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; +use crate::src::ipred::dc_gen_left; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_top_c_erased; @@ -131,16 +132,6 @@ extern "C" { pub type pixel = u8; -unsafe fn dc_gen_left(topleft: *const pixel, height: c_int) -> c_uint { - let mut dc: c_uint = (height >> 1) as c_uint; - let mut i = 0; - while i < height { - dc = dc.wrapping_add(*topleft.offset(-(1 + i) as isize) as c_uint); - i += 1; - } - return dc >> ctz(height as c_uint); -} - unsafe extern "C" fn ipred_dc_left_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -157,7 +148,7 @@ unsafe extern "C" fn ipred_dc_left_c_erased( stride, width, height, - dc_gen_left(topleft.cast(), height) as c_int, + dc_gen_left::(topleft.cast(), height) as c_int, BitDepth8::new(()), ); } @@ -172,7 +163,7 @@ unsafe extern "C" fn ipred_cfl_left_c_erased( alpha: c_int, _bitdepth_max: c_int, ) { - let dc: c_uint = dc_gen_left(topleft.cast(), height); + let dc: c_uint = dc_gen_left::(topleft.cast(), height); cfl_pred::( dst.cast(), stride, From 27eeeacad4c5db8ddf8d6983664050506a95dc5f Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 20:51:44 -0700 Subject: [PATCH 12/49] `fn ipred_dc_left_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 22 ++++++++++++++++++++++ src/ipred_tmpl_16.rs | 24 ++---------------------- src/ipred_tmpl_8.rs | 24 ++---------------------- 3 files changed, 26 insertions(+), 44 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 7e19ba9f1..b2a8a4e79 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -317,6 +317,28 @@ pub(crate) unsafe fn dc_gen_left(topleft: *const BD::Pixel, height return dc >> ctz(height as c_uint); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_dc_left_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + bitdepth_max: c_int, +) { + splat_dc::( + dst.cast(), + stride, + width, + height, + dc_gen_left::(topleft.cast(), height) as c_int, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 2b4a2e33f..87d6b386c 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -9,6 +9,7 @@ use crate::src::ipred::dc_gen_left; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_top_c_erased; +use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; @@ -153,27 +154,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_dc_left_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - splat_dc::( - dst.cast(), - stride, - width, - height, - dc_gen_left::(topleft.cast(), height) as c_int, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe extern "C" fn ipred_cfl_left_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1920,7 +1900,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased); (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased); (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased); + (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased); (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased); (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased); diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 4b223d147..c7e44d8cf 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -10,6 +10,7 @@ use crate::src::ipred::dc_gen_left; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_top_c_erased; +use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; @@ -132,27 +133,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_dc_left_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - splat_dc::( - dst.cast(), - stride, - width, - height, - dc_gen_left::(topleft.cast(), height) as c_int, - BitDepth8::new(()), - ); -} - unsafe extern "C" fn ipred_cfl_left_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1851,7 +1831,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased); (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased); (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased); + (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased); (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased); (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased); From 3b017cda1103f3b88cab00b3af3cf25c15bbb65d Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 20:53:47 -0700 Subject: [PATCH 13/49] `fn ipred_cfl_left_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 27 +++++++++++++++++++++++++-- src/ipred_tmpl_16.rs | 27 ++------------------------- src/ipred_tmpl_8.rs | 27 ++------------------------- 3 files changed, 29 insertions(+), 52 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index b2a8a4e79..bcf7e91dc 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -306,8 +306,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_top_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn dc_gen_left(topleft: *const BD::Pixel, height: c_int) -> c_uint { +unsafe fn dc_gen_left(topleft: *const BD::Pixel, height: c_int) -> c_uint { let mut dc: c_uint = (height >> 1) as c_uint; let mut i = 0; while i < height { @@ -339,6 +338,30 @@ pub(crate) unsafe extern "C" fn ipred_dc_left_c_erased( ); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_cfl_left_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + ac: *const i16, + alpha: c_int, + bitdepth_max: c_int, +) { + let dc: c_uint = dc_gen_left::(topleft.cast(), height); + cfl_pred::( + dst.cast(), + stride, + width, + height, + dc as c_int, + ac, + alpha, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 87d6b386c..0d6660cc4 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -5,9 +5,9 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; -use crate::src::ipred::dc_gen_left; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; @@ -154,29 +154,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_cfl_left_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - ac: *const i16, - alpha: c_int, - bitdepth_max: c_int, -) { - let dc: c_uint = dc_gen_left::(topleft.cast(), height); - cfl_pred::( - dst.cast(), - stride, - width, - height, - dc as c_int, - ac, - alpha, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe fn dc_gen(topleft: *const pixel, width: c_int, height: c_int) -> c_uint { let mut dc: c_uint = (width + height >> 1) as c_uint; let mut i = 0; @@ -1919,7 +1896,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased; (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; - (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased; + (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; (*c).pal_pred = pal_pred_c_erased; diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index c7e44d8cf..1dacef7e7 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -6,9 +6,9 @@ use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; -use crate::src::ipred::dc_gen_left; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; @@ -133,29 +133,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_cfl_left_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - ac: *const i16, - alpha: c_int, - _bitdepth_max: c_int, -) { - let dc: c_uint = dc_gen_left::(topleft.cast(), height); - cfl_pred::( - dst.cast(), - stride, - width, - height, - dc as c_int, - ac, - alpha, - BitDepth8::new(()), - ); -} - unsafe fn dc_gen(topleft: *const pixel, width: c_int, height: c_int) -> c_uint { let mut dc: c_uint = (width + height >> 1) as c_uint; let mut i = 0; @@ -1849,7 +1826,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased; (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased; (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; - (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased; + (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; (*c).pal_pred = pal_pred_c_erased; From bd1ba85d7df2539b8fbab525981a7d78f73c3a9f Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 21:00:47 -0700 Subject: [PATCH 14/49] `fn dc_gen`: Deduplicate w/ generics. --- src/ipred.rs | 34 ++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 31 +++---------------------------- src/ipred_tmpl_8.rs | 31 +++---------------------------- 3 files changed, 40 insertions(+), 56 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index bcf7e91dc..48b1dcab9 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -362,6 +362,40 @@ pub(crate) unsafe extern "C" fn ipred_cfl_left_c_erased( ); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn dc_gen( + topleft: *const BD::Pixel, + width: c_int, + height: c_int, +) -> c_uint { + let (multiplier_1x2, multiplier_1x4, base_shift) = match BD::BPC { + BPC::BPC8 => (0x5556, 0x3334, 16), + BPC::BPC16 => (0xAAAB, 0x6667, 17), + }; + + let mut dc: c_uint = (width + height >> 1) as c_uint; + let mut i = 0; + while i < width { + dc = dc.wrapping_add((*topleft.offset((i + 1) as isize)).as_::()); + i += 1; + } + let mut i_0 = 0; + while i_0 < height { + dc = dc.wrapping_add((*topleft.offset(-(i_0 + 1) as isize)).as_::()); + i_0 += 1; + } + dc >>= ctz((width + height) as c_uint); + if width != height { + dc = dc.wrapping_mul(if width > height * 2 || height > width * 2 { + multiplier_1x4 + } else { + multiplier_1x2 + }); + dc >>= base_shift; + } + return dc; +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 0d6660cc4..bb1109692 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -5,6 +5,7 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; +use crate::src::ipred::dc_gen; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_left_c_erased; @@ -154,32 +155,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe fn dc_gen(topleft: *const pixel, width: c_int, height: c_int) -> c_uint { - let mut dc: c_uint = (width + height >> 1) as c_uint; - let mut i = 0; - while i < width { - dc = dc.wrapping_add(*topleft.offset((i + 1) as isize) as c_uint); - i += 1; - } - let mut i_0 = 0; - while i_0 < height { - dc = dc.wrapping_add(*topleft.offset(-(i_0 + 1) as isize) as c_uint); - i_0 += 1; - } - dc >>= ctz((width + height) as c_uint); - if width != height { - dc = dc.wrapping_mul( - (if width > height * 2 || height > width * 2 { - 0x6667 as c_int - } else { - 0xaaab as c_int - }) as c_uint, - ); - dc >>= 17; - } - return dc; -} - unsafe extern "C" fn ipred_dc_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -196,7 +171,7 @@ unsafe extern "C" fn ipred_dc_c_erased( stride, width, height, - dc_gen(topleft.cast(), width, height) as c_int, + dc_gen::(topleft.cast(), width, height) as c_int, BitDepth16::from_c(bitdepth_max), ); } @@ -211,7 +186,7 @@ unsafe extern "C" fn ipred_cfl_c_erased( alpha: c_int, bitdepth_max: c_int, ) { - let dc: c_uint = dc_gen(topleft.cast(), width, height); + let dc: c_uint = dc_gen::(topleft.cast(), width, height); cfl_pred::( dst.cast(), stride, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 1dacef7e7..e97a15bc6 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -6,6 +6,7 @@ use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; +use crate::src::ipred::dc_gen; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_left_c_erased; @@ -133,32 +134,6 @@ extern "C" { pub type pixel = u8; -unsafe fn dc_gen(topleft: *const pixel, width: c_int, height: c_int) -> c_uint { - let mut dc: c_uint = (width + height >> 1) as c_uint; - let mut i = 0; - while i < width { - dc = dc.wrapping_add(*topleft.offset((i + 1) as isize) as c_uint); - i += 1; - } - let mut i_0 = 0; - while i_0 < height { - dc = dc.wrapping_add(*topleft.offset(-(i_0 + 1) as isize) as c_uint); - i_0 += 1; - } - dc >>= ctz((width + height) as c_uint); - if width != height { - dc = dc.wrapping_mul( - (if width > height * 2 || height > width * 2 { - 0x3334 as c_int - } else { - 0x5556 as c_int - }) as c_uint, - ); - dc >>= 16; - } - return dc; -} - unsafe extern "C" fn ipred_dc_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -175,7 +150,7 @@ unsafe extern "C" fn ipred_dc_c_erased( stride, width, height, - dc_gen(topleft.cast(), width, height) as c_int, + dc_gen::(topleft.cast(), width, height) as c_int, BitDepth8::new(()), ); } @@ -190,7 +165,7 @@ unsafe extern "C" fn ipred_cfl_c_erased( alpha: c_int, _bitdepth_max: c_int, ) { - let dc: c_uint = dc_gen(topleft.cast(), width, height); + let dc: c_uint = dc_gen::(topleft.cast(), width, height); cfl_pred::( dst.cast(), stride, From 683986b70ec7a95c794c7cf3ea3080960bff8064 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 21:03:21 -0700 Subject: [PATCH 15/49] `fn ipred_dc_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 22 ++++++++++++++++++++++ src/ipred_tmpl_16.rs | 24 ++---------------------- src/ipred_tmpl_8.rs | 24 ++---------------------- 3 files changed, 26 insertions(+), 44 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 48b1dcab9..f5453a874 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -396,6 +396,28 @@ pub(crate) unsafe fn dc_gen( return dc; } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_dc_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + bitdepth_max: c_int, +) { + splat_dc::( + dst.cast(), + stride, + width, + height, + dc_gen::(topleft.cast(), width, height) as c_int, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index bb1109692..625247e26 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -10,6 +10,7 @@ use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; +use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::splat_dc; @@ -155,27 +156,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_dc_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - splat_dc::( - dst.cast(), - stride, - width, - height, - dc_gen::(topleft.cast(), width, height) as c_int, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe extern "C" fn ipred_cfl_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1849,7 +1829,7 @@ unsafe fn ipred_z1_neon( #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) { - (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased); + (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased); (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index e97a15bc6..45333034a 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -11,6 +11,7 @@ use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; +use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::splat_dc; @@ -134,27 +135,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_dc_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - splat_dc::( - dst.cast(), - stride, - width, - height, - dc_gen::(topleft.cast(), width, height) as c_int, - BitDepth8::new(()), - ); -} - unsafe extern "C" fn ipred_cfl_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1780,7 +1760,7 @@ unsafe fn ipred_z1_neon( #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { - (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased); + (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased); (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); From 759203ced306a9d60e7898e93e877f27f6f88b31 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 21:07:09 -0700 Subject: [PATCH 16/49] `fn ipred_cfl_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 31 +++++++++++++++++++++++++------ src/ipred_tmpl_16.rs | 27 ++------------------------- src/ipred_tmpl_8.rs | 27 ++------------------------- 3 files changed, 29 insertions(+), 56 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index f5453a874..c63649fa0 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -362,12 +362,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_left_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn dc_gen( - topleft: *const BD::Pixel, - width: c_int, - height: c_int, -) -> c_uint { +unsafe fn dc_gen(topleft: *const BD::Pixel, width: c_int, height: c_int) -> c_uint { let (multiplier_1x2, multiplier_1x4, base_shift) = match BD::BPC { BPC::BPC8 => (0x5556, 0x3334, 16), BPC::BPC16 => (0xAAAB, 0x6667, 17), @@ -418,6 +413,30 @@ pub(crate) unsafe extern "C" fn ipred_dc_c_erased( ); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_cfl_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + ac: *const i16, + alpha: c_int, + bitdepth_max: c_int, +) { + let dc: c_uint = dc_gen::(topleft.cast(), width, height); + cfl_pred::( + dst.cast(), + stride, + width, + height, + dc as c_int, + ac, + alpha, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 625247e26..678d901fb 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -5,9 +5,9 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; -use crate::src::ipred::dc_gen; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; use crate::src::ipred::ipred_dc_c_erased; @@ -156,29 +156,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_cfl_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - ac: *const i16, - alpha: c_int, - bitdepth_max: c_int, -) { - let dc: c_uint = dc_gen::(topleft.cast(), width, height); - cfl_pred::( - dst.cast(), - stride, - width, - height, - dc as c_int, - ac, - alpha, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe extern "C" fn ipred_dc_128_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1847,7 +1824,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased; (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased; (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased; - (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased; + (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased; (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 45333034a..33821cbae 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -6,9 +6,9 @@ use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_pred; -use crate::src::ipred::dc_gen; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; use crate::src::ipred::ipred_dc_c_erased; @@ -135,29 +135,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_cfl_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - ac: *const i16, - alpha: c_int, - _bitdepth_max: c_int, -) { - let dc: c_uint = dc_gen::(topleft.cast(), width, height); - cfl_pred::( - dst.cast(), - stride, - width, - height, - dc as c_int, - ac, - alpha, - BitDepth8::new(()), - ); -} - unsafe extern "C" fn ipred_dc_128_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1778,7 +1755,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased; (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased; (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased; - (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased; + (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased; (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; From 80777ef3349f5344b69c4f9b24c82b7e88714664 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 21:11:00 -0700 Subject: [PATCH 17/49] `fn ipred_dc_128_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 20 ++++++++++++++++++-- src/ipred_tmpl_16.rs | 26 ++------------------------ src/ipred_tmpl_8.rs | 19 ++----------------- 3 files changed, 22 insertions(+), 43 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index c63649fa0..6f7280971 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -160,9 +160,8 @@ extern "C" { decl_fns!(pal_pred, pal_pred); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] -pub(crate) unsafe fn splat_dc( +unsafe fn splat_dc( mut dst: *mut BD::Pixel, stride: ptrdiff_t, width: c_int, @@ -437,6 +436,23 @@ pub(crate) unsafe extern "C" fn ipred_cfl_c_erased( ); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_dc_128_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + _topleft: *const DynPixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + bitdepth_max: c_int, +) { + let bd = BD::from_c(bitdepth_max); + let dc = bd.bitdepth_max().as_::() + 1 >> 1; + splat_dc::(dst.cast(), stride, width, height, dc, bd); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 678d901fb..9b97c66ab 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -10,10 +10,10 @@ use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; +use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; -use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -156,28 +156,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_dc_128_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - _topleft: *const DynPixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - let dc = bitdepth_max + 1 >> 1; - splat_dc::( - dst.cast(), - stride, - width, - height, - dc, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe extern "C" fn ipred_cfl_128_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1807,7 +1785,7 @@ unsafe fn ipred_z1_neon( #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); - (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased); + (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased::); (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased); diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 33821cbae..0c2d32df8 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -11,10 +11,10 @@ use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; +use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; -use crate::src::ipred::splat_dc; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -135,21 +135,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_dc_128_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - _topleft: *const DynPixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - let dc = 128; - splat_dc::(dst.cast(), stride, width, height, dc, BitDepth8::new(())); -} - unsafe extern "C" fn ipred_cfl_128_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1738,7 +1723,7 @@ unsafe fn ipred_z1_neon( #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); - (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased); + (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased::); (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased); From fe30d1dd08410f02ca0a112b477fc43c44ec6952 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 21:19:22 -0700 Subject: [PATCH 18/49] `fn ipred_cfl_128_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 19 +++++++++++++++++-- src/ipred_tmpl_16.rs | 31 +++++-------------------------- src/ipred_tmpl_8.rs | 31 +++++-------------------------- 3 files changed, 27 insertions(+), 54 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 6f7280971..4e3281309 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -224,9 +224,8 @@ unsafe fn splat_dc( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] -pub(crate) unsafe fn cfl_pred( +unsafe fn cfl_pred( mut dst: *mut BD::Pixel, stride: ptrdiff_t, width: c_int, @@ -453,6 +452,22 @@ pub(crate) unsafe extern "C" fn ipred_dc_128_c_erased( splat_dc::(dst.cast(), stride, width, height, dc, bd); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_cfl_128_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + _topleft: *const DynPixel, + width: c_int, + height: c_int, + ac: *const i16, + alpha: c_int, + bitdepth_max: c_int, +) { + let bd = BD::from_c(bitdepth_max); + let dc = bd.bitdepth_max().as_::() + 1 >> 1; + cfl_pred::(dst.cast(), stride, width, height, dc, ac, alpha, bd); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 9b97c66ab..5e03aca5c 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -1,12 +1,11 @@ use crate::include::common::attributes::ctz; -use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; -use crate::src::ipred::cfl_pred; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; @@ -45,6 +44,9 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use crate::include::common::bitdepth::BitDepth; + #[cfg(all(feature = "asm", target_arch = "aarch64"))] extern "C" { fn dav1d_ipred_z1_fill2_16bpc_neon( @@ -156,29 +158,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_cfl_128_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - _topleft: *const DynPixel, - width: c_int, - height: c_int, - ac: *const i16, - alpha: c_int, - bitdepth_max: c_int, -) { - let dc = bitdepth_max + 1 >> 1; - cfl_pred::( - dst.cast(), - stride, - width, - height, - dc, - ac, - alpha, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe extern "C" fn ipred_v_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1804,7 +1783,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased; (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; - (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased; + (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased::; (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 0c2d32df8..13145d57b 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -1,13 +1,12 @@ use crate::include::common::attributes::ctz; -use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; -use crate::src::ipred::cfl_pred; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; +use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; use crate::src::ipred::ipred_cfl_top_c_erased; @@ -47,6 +46,9 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use crate::include::common::bitdepth::BitDepth; + #[cfg(all(feature = "asm", target_arch = "aarch64"))] extern "C" { fn dav1d_ipred_z1_fill2_8bpc_neon( @@ -135,29 +137,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_cfl_128_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - _topleft: *const DynPixel, - width: c_int, - height: c_int, - ac: *const i16, - alpha: c_int, - _bitdepth_max: c_int, -) { - let dc = 128; - cfl_pred::( - dst.cast(), - stride, - width, - height, - dc, - ac, - alpha, - BitDepth8::new(()), - ); -} - unsafe extern "C" fn ipred_v_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -1741,7 +1720,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased; (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased; (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; - (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased; + (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased::; (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; From d26f0e3dcd0443434df6bd2dc8846f2f9128dd1b Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 3 Nov 2023 21:26:16 -0700 Subject: [PATCH 19/49] `fn ipred_v_rust`: Deduplicate w/ generics. --- src/ipred.rs | 27 +++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 32 ++++---------------------------- src/ipred_tmpl_8.rs | 30 ++++-------------------------- 3 files changed, 35 insertions(+), 54 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 4e3281309..f2a0684dd 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -10,6 +10,7 @@ use std::ffi::c_int; use std::ffi::c_uint; use std::ffi::c_ulong; use std::ffi::c_ulonglong; +use std::slice; pub type angular_ipred_fn = unsafe extern "C" fn( *mut DynPixel, @@ -468,6 +469,32 @@ pub(crate) unsafe extern "C" fn ipred_cfl_128_c_erased( cfl_pred::(dst.cast(), stride, width, height, dc, ac, alpha, bd); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_v_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft: *const BD::Pixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + _bd: BD, +) { + let width = width.try_into().unwrap(); + + let mut y = 0; + while y < height { + BD::pixel_copy( + slice::from_raw_parts_mut(dst, width), + &slice::from_raw_parts(topleft, width + 1)[1..], + width, + ); + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 5e03aca5c..9f407c23d 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -1,4 +1,5 @@ use crate::include::common::attributes::ctz; +use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; @@ -13,6 +14,7 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; +use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -44,9 +46,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::include::common::bitdepth::BitDepth; - #[cfg(all(feature = "asm", target_arch = "aarch64"))] extern "C" { fn dav1d_ipred_z1_fill2_16bpc_neon( @@ -169,7 +168,7 @@ unsafe extern "C" fn ipred_v_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_v_rust( + ipred_v_rust::( dst.cast(), stride, topleft.cast(), @@ -178,33 +177,10 @@ unsafe extern "C" fn ipred_v_c_erased( a, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_v_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - let mut y = 0; - while y < height { - memcpy( - dst as *mut c_void, - topleft.offset(1) as *const c_void, - (width << 1) as usize, - ); - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_h_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 13145d57b..8c1703412 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -1,4 +1,5 @@ use crate::include::common::attributes::ctz; +use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; @@ -14,6 +15,7 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; +use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -46,9 +48,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::include::common::bitdepth::BitDepth; - #[cfg(all(feature = "asm", target_arch = "aarch64"))] extern "C" { fn dav1d_ipred_z1_fill2_8bpc_neon( @@ -148,7 +147,7 @@ unsafe extern "C" fn ipred_v_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_v_rust( + ipred_v_rust::( dst.cast(), stride, topleft.cast(), @@ -157,31 +156,10 @@ unsafe extern "C" fn ipred_v_c_erased( a, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_v_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let mut y = 0; - while y < height { - memcpy( - dst as *mut c_void, - topleft.offset(1) as *const c_void, - width as usize, - ); - dst = dst.offset(stride as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_h_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, From 334d1c1b2a9988ca57509c144952338902f043b7 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 00:53:58 -0800 Subject: [PATCH 20/49] `fn ipred_h_rust`: Deduplicate w/ generics. --- src/ipred.rs | 26 ++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 24 +++--------------------- src/ipred_tmpl_8.rs | 26 +++----------------------- 3 files changed, 32 insertions(+), 44 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index f2a0684dd..ef2c16857 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -495,6 +495,32 @@ pub(crate) unsafe fn ipred_v_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_h_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft: *const BD::Pixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + _bd: BD, +) { + let width = width.try_into().unwrap(); + + let mut y = 0; + while y < height { + BD::pixel_set( + slice::from_raw_parts_mut(dst, width), + *topleft.offset(-(1 + y) as isize), + width, + ); + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 9f407c23d..1604def4e 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -14,6 +14,7 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; +use crate::src::ipred::ipred_h_rust; use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -192,7 +193,7 @@ unsafe extern "C" fn ipred_h_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_h_rust( + ipred_h_rust::( dst.cast(), stride, topleft.cast(), @@ -201,29 +202,10 @@ unsafe extern "C" fn ipred_h_c_erased( a, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_h_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - let mut y = 0; - while y < height { - pixel_set(dst, *topleft.offset(-(1 + y) as isize) as c_int, width); - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_paeth_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 8c1703412..a9786dffa 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -15,6 +15,7 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; +use crate::src::ipred::ipred_h_rust; use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -171,7 +172,7 @@ unsafe extern "C" fn ipred_h_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_h_rust( + ipred_h_rust::( dst.cast(), stride, topleft.cast(), @@ -180,31 +181,10 @@ unsafe extern "C" fn ipred_h_c_erased( a, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_h_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let mut y = 0; - while y < height { - memset( - dst as *mut c_void, - *topleft.offset(-(1 + y) as isize) as c_int, - width as usize, - ); - dst = dst.offset(stride as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_paeth_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, From 02147a32aa34ae8ffec01caf2e49c393c2f69264 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 00:57:08 -0800 Subject: [PATCH 21/49] `fn ipred_paeth_rust`: Deduplicate w/ generics. --- src/ipred.rs | 38 ++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 41 +++-------------------------------------- src/ipred_tmpl_8.rs | 39 +++------------------------------------ 3 files changed, 44 insertions(+), 74 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index ef2c16857..eeaff3290 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -521,6 +521,44 @@ pub(crate) unsafe fn ipred_h_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_paeth_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + tl_ptr: *const BD::Pixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + _bd: BD, +) { + let topleft = (*tl_ptr.offset(0)).as_::(); + let mut y = 0; + while y < height { + let left = (*tl_ptr.offset(-(y + 1) as isize)).as_::(); + let mut x = 0; + while x < width { + let top = (*tl_ptr.offset((1 + x) as isize)).as_::(); + let base = left + top - topleft; + let ldiff = (left - base).abs(); + let tdiff = (top - base).abs(); + let tldiff = (topleft - base).abs(); + *dst.offset(x as isize) = (if ldiff <= tdiff && ldiff <= tldiff { + left + } else if tdiff <= tldiff { + top + } else { + topleft + }) + .as_::(); + x += 1; + } + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 1604def4e..a2340afb8 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -15,6 +15,7 @@ use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::ipred_h_rust; +use crate::src::ipred::ipred_paeth_rust; use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -217,7 +218,7 @@ unsafe extern "C" fn ipred_paeth_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_paeth_rust( + ipred_paeth_rust::( dst.cast(), stride, tl_ptr.cast(), @@ -226,46 +227,10 @@ unsafe extern "C" fn ipred_paeth_c_erased( a, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_paeth_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - tl_ptr: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - let topleft = *tl_ptr.offset(0) as c_int; - let mut y = 0; - while y < height { - let left = *tl_ptr.offset(-(y + 1) as isize) as c_int; - let mut x = 0; - while x < width { - let top = *tl_ptr.offset((1 + x) as isize) as c_int; - let base = left + top - topleft; - let ldiff = (left - base).abs(); - let tdiff = (top - base).abs(); - let tldiff = (topleft - base).abs(); - *dst.offset(x as isize) = (if ldiff <= tdiff && ldiff <= tldiff { - left - } else if tdiff <= tldiff { - top - } else { - topleft - }) as pixel; - x += 1; - } - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_smooth_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index a9786dffa..975424597 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -16,6 +16,7 @@ use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::ipred_h_rust; +use crate::src::ipred::ipred_paeth_rust; use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -196,7 +197,7 @@ unsafe extern "C" fn ipred_paeth_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_paeth_rust( + ipred_paeth_rust::( dst.cast(), stride, tl_ptr.cast(), @@ -205,44 +206,10 @@ unsafe extern "C" fn ipred_paeth_c_erased( a, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_paeth_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - tl_ptr: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let topleft = *tl_ptr.offset(0) as c_int; - let mut y = 0; - while y < height { - let left = *tl_ptr.offset(-(y + 1) as isize) as c_int; - let mut x = 0; - while x < width { - let top = *tl_ptr.offset((1 + x) as isize) as c_int; - let base = left + top - topleft; - let ldiff = (left - base).abs(); - let tdiff = (top - base).abs(); - let tldiff = (topleft - base).abs(); - *dst.offset(x as isize) = (if ldiff <= tdiff && ldiff <= tldiff { - left - } else if tdiff <= tldiff { - top - } else { - topleft - }) as pixel; - x += 1; - } - dst = dst.offset(stride as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_smooth_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, From 54a92fd0647cbc7a8d353685c9203cbaa32f3875 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 01:01:59 -0800 Subject: [PATCH 22/49] `fn ipred_smooth_rust`: Deduplicate w/ generics. --- src/ipred.rs | 35 +++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 38 +++----------------------------------- src/ipred_tmpl_8.rs | 36 +++--------------------------------- 3 files changed, 41 insertions(+), 68 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index eeaff3290..ccf17425a 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -4,6 +4,7 @@ use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::DynPixel; use crate::include::common::bitdepth::BPC; use crate::include::common::intops::apply_sign; +use crate::src::tables::dav1d_sm_weights; use cfg_if::cfg_if; use libc::ptrdiff_t; use std::ffi::c_int; @@ -559,6 +560,40 @@ pub(crate) unsafe fn ipred_paeth_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_smooth_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft: *const BD::Pixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + _bd: BD, +) { + let weights_hor: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(width as isize) as *const u8; + let weights_ver: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(height as isize) as *const u8; + let right = (*topleft.offset(width as isize)).as_::(); + let bottom = (*topleft.offset(-height as isize)).as_::(); + let mut y = 0; + while y < height { + let mut x = 0; + while x < width { + let pred = *weights_ver.offset(y as isize) as c_int + * (*topleft.offset((1 + x) as isize)).as_::() + + (256 - *weights_ver.offset(y as isize) as c_int) * bottom + + *weights_hor.offset(x as isize) as c_int + * (*topleft.offset(-(1 + y) as isize)).as_::() + + (256 - *weights_hor.offset(x as isize) as c_int) * right; + *dst.offset(x as isize) = (pred + 256 >> 9).as_::(); + x += 1; + } + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index a2340afb8..367249316 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -16,6 +16,7 @@ use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::ipred_h_rust; use crate::src::ipred::ipred_paeth_rust; +use crate::src::ipred::ipred_smooth_rust; use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -242,7 +243,7 @@ unsafe extern "C" fn ipred_smooth_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_smooth_rust( + ipred_smooth_rust::( dst.cast(), stride, topleft.cast(), @@ -251,43 +252,10 @@ unsafe extern "C" fn ipred_smooth_c_erased( a, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_smooth_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - let weights_hor: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(width as isize) as *const u8; - let weights_ver: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(height as isize) as *const u8; - let right = *topleft.offset(width as isize) as c_int; - let bottom = *topleft.offset(-height as isize) as c_int; - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - let pred = *weights_ver.offset(y as isize) as c_int - * *topleft.offset((1 + x) as isize) as c_int - + (256 - *weights_ver.offset(y as isize) as c_int) * bottom - + *weights_hor.offset(x as isize) as c_int - * *topleft.offset(-(1 + y) as isize) as c_int - + (256 - *weights_hor.offset(x as isize) as c_int) * right; - *dst.offset(x as isize) = (pred + 256 >> 9) as pixel; - x += 1; - } - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_smooth_v_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 975424597..926d503b3 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -17,6 +17,7 @@ use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::ipred_h_rust; use crate::src::ipred::ipred_paeth_rust; +use crate::src::ipred::ipred_smooth_rust; use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -221,7 +222,7 @@ unsafe extern "C" fn ipred_smooth_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_smooth_rust( + ipred_smooth_rust::( dst.cast(), stride, topleft.cast(), @@ -230,41 +231,10 @@ unsafe extern "C" fn ipred_smooth_c_erased( a, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_smooth_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let weights_hor: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(width as isize) as *const u8; - let weights_ver: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(height as isize) as *const u8; - let right = *topleft.offset(width as isize) as c_int; - let bottom = *topleft.offset(-height as isize) as c_int; - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - let pred = *weights_ver.offset(y as isize) as c_int - * *topleft.offset((1 + x) as isize) as c_int - + (256 - *weights_ver.offset(y as isize) as c_int) * bottom - + *weights_hor.offset(x as isize) as c_int - * *topleft.offset(-(1 + y) as isize) as c_int - + (256 - *weights_hor.offset(x as isize) as c_int) * right; - *dst.offset(x as isize) = (pred + 256 >> 9) as pixel; - x += 1; - } - dst = dst.offset(stride as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_smooth_v_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, From e686a303a9b37994753c7babd7a74348b76c36df Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 01:06:33 -0800 Subject: [PATCH 23/49] `fn ipred_smooth_v_rust`: Deduplicate w/ rust. --- src/ipred.rs | 29 +++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 33 +++------------------------------ src/ipred_tmpl_8.rs | 31 +++---------------------------- 3 files changed, 35 insertions(+), 58 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index ccf17425a..ae690da6f 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -594,6 +594,35 @@ pub(crate) unsafe fn ipred_smooth_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_smooth_v_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft: *const BD::Pixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + _bd: BD, +) { + let weights_ver: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(height as isize) as *const u8; + let bottom = (*topleft.offset(-height as isize)).as_::(); + let mut y = 0; + while y < height { + let mut x = 0; + while x < width { + let pred = *weights_ver.offset(y as isize) as c_int + * (*topleft.offset((1 + x) as isize)).as_::() + + (256 - *weights_ver.offset(y as isize) as c_int) * bottom; + *dst.offset(x as isize) = (pred + 128 >> 8).as_::(); + x += 1; + } + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 367249316..12ace9719 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -17,6 +17,7 @@ use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::ipred_h_rust; use crate::src::ipred::ipred_paeth_rust; use crate::src::ipred::ipred_smooth_rust; +use crate::src::ipred::ipred_smooth_v_rust; use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -267,7 +268,7 @@ unsafe extern "C" fn ipred_smooth_v_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_smooth_v_rust( + ipred_smooth_v_rust::( dst.cast(), stride, topleft.cast(), @@ -276,38 +277,10 @@ unsafe extern "C" fn ipred_smooth_v_c_erased( a, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_smooth_v_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - let weights_ver: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(height as isize) as *const u8; - let bottom = *topleft.offset(-height as isize) as c_int; - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - let pred = *weights_ver.offset(y as isize) as c_int - * *topleft.offset((1 + x) as isize) as c_int - + (256 - *weights_ver.offset(y as isize) as c_int) * bottom; - *dst.offset(x as isize) = (pred + 128 >> 8) as pixel; - x += 1; - } - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_smooth_h_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 926d503b3..1c5f48e51 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -18,6 +18,7 @@ use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::ipred_h_rust; use crate::src::ipred::ipred_paeth_rust; use crate::src::ipred::ipred_smooth_rust; +use crate::src::ipred::ipred_smooth_v_rust; use crate::src::ipred::ipred_v_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -246,7 +247,7 @@ unsafe extern "C" fn ipred_smooth_v_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_smooth_v_rust( + ipred_smooth_v_rust::( dst.cast(), stride, topleft.cast(), @@ -255,36 +256,10 @@ unsafe extern "C" fn ipred_smooth_v_c_erased( a, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_smooth_v_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let weights_ver: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(height as isize) as *const u8; - let bottom = *topleft.offset(-height as isize) as c_int; - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - let pred = *weights_ver.offset(y as isize) as c_int - * *topleft.offset((1 + x) as isize) as c_int - + (256 - *weights_ver.offset(y as isize) as c_int) * bottom; - *dst.offset(x as isize) = (pred + 128 >> 8) as pixel; - x += 1; - } - dst = dst.offset(stride as isize); - y += 1; - } -} - unsafe extern "C" fn ipred_smooth_h_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, From 72a3519c14a1f49111baa6a82b13065b579a13ec Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 01:11:23 -0800 Subject: [PATCH 24/49] `fn ipred_smooth_h_rust`: Deduplicate w/ generics. --- src/ipred.rs | 29 +++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 34 +++------------------------------- src/ipred_tmpl_8.rs | 32 +++----------------------------- 3 files changed, 35 insertions(+), 60 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index ae690da6f..a54926ee9 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -623,6 +623,35 @@ pub(crate) unsafe fn ipred_smooth_v_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_smooth_h_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft: *const BD::Pixel, + width: c_int, + height: c_int, + _a: c_int, + _max_width: c_int, + _max_height: c_int, + _bd: BD, +) { + let weights_hor: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(width as isize) as *const u8; + let right = (*topleft.offset(width as isize)).as_::(); + let mut y = 0; + while y < height { + let mut x = 0; + while x < width { + let pred = *weights_hor.offset(x as isize) as c_int + * (*topleft.offset(-(y + 1) as isize)).as_::() + + (256 - *weights_hor.offset(x as isize) as c_int) * right; + *dst.offset(x as isize) = (pred + 128 >> 8).as_::(); + x += 1; + } + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 12ace9719..711826b12 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -16,6 +16,7 @@ use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::ipred_h_rust; use crate::src::ipred::ipred_paeth_rust; +use crate::src::ipred::ipred_smooth_h_rust; use crate::src::ipred::ipred_smooth_rust; use crate::src::ipred::ipred_smooth_v_rust; use crate::src::ipred::ipred_v_rust; @@ -36,7 +37,6 @@ use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; use crate::src::tables::dav1d_dr_intra_derivative; use crate::src::tables::dav1d_filter_intra_taps; -use crate::src::tables::dav1d_sm_weights; use libc::memcpy; use libc::ptrdiff_t; use std::cmp; @@ -292,7 +292,7 @@ unsafe extern "C" fn ipred_smooth_h_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_smooth_h_rust( + ipred_smooth_h_rust::( dst.cast(), stride, topleft.cast(), @@ -301,38 +301,10 @@ unsafe extern "C" fn ipred_smooth_h_c_erased( a, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_smooth_h_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, - _bitdepth_max: c_int, -) { - let weights_hor: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(width as isize) as *const u8; - let right = *topleft.offset(width as isize) as c_int; - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - let pred = *weights_hor.offset(x as isize) as c_int - * *topleft.offset(-(y + 1) as isize) as c_int - + (256 - *weights_hor.offset(x as isize) as c_int) * right; - *dst.offset(x as isize) = (pred + 128 >> 8) as pixel; - x += 1; - } - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } -} - #[inline(never)] unsafe fn filter_edge( out: *mut pixel, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 1c5f48e51..0a40dd7db 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -17,6 +17,7 @@ use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; use crate::src::ipred::ipred_h_rust; use crate::src::ipred::ipred_paeth_rust; +use crate::src::ipred::ipred_smooth_h_rust; use crate::src::ipred::ipred_smooth_rust; use crate::src::ipred::ipred_smooth_v_rust; use crate::src::ipred::ipred_v_rust; @@ -37,7 +38,6 @@ use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; use crate::src::tables::dav1d_dr_intra_derivative; use crate::src::tables::dav1d_filter_intra_taps; -use crate::src::tables::dav1d_sm_weights; use libc::memcpy; use libc::memset; use libc::ptrdiff_t; @@ -271,7 +271,7 @@ unsafe extern "C" fn ipred_smooth_h_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_smooth_h_rust( + ipred_smooth_h_rust::( dst.cast(), stride, topleft.cast(), @@ -280,36 +280,10 @@ unsafe extern "C" fn ipred_smooth_h_c_erased( a, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_smooth_h_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft: *const pixel, - width: c_int, - height: c_int, - _a: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let weights_hor: *const u8 = &*dav1d_sm_weights.0.as_ptr().offset(width as isize) as *const u8; - let right = *topleft.offset(width as isize) as c_int; - let mut y = 0; - while y < height { - let mut x = 0; - while x < width { - let pred = *weights_hor.offset(x as isize) as c_int - * *topleft.offset(-(y + 1) as isize) as c_int - + (256 - *weights_hor.offset(x as isize) as c_int) * right; - *dst.offset(x as isize) = (pred + 128 >> 8) as pixel; - x += 1; - } - dst = dst.offset(stride as isize); - y += 1; - } -} - #[inline(never)] unsafe fn filter_edge( out: *mut pixel, From 2762c1efb317a78d54e29d46aaccfb586c3e4c42 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 01:24:21 -0800 Subject: [PATCH 25/49] `fn ipred_{v,h,paeth,smooth{,_v,_h}}_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 158 ++++++++++++++++++++++++++++++++++++++-- src/ipred_tmpl_16.rs | 168 ++++--------------------------------------- src/ipred_tmpl_8.rs | 168 ++++--------------------------------------- 3 files changed, 175 insertions(+), 319 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index a54926ee9..3302b1cdf 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -470,8 +470,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_128_c_erased( cfl_pred::(dst.cast(), stride, width, height, dc, ac, alpha, bd); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_v_rust( +unsafe fn ipred_v_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, topleft: *const BD::Pixel, @@ -497,7 +496,31 @@ pub(crate) unsafe fn ipred_v_rust( } // TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_h_rust( +pub(crate) unsafe extern "C" fn ipred_v_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + a: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_v_rust::( + dst.cast(), + stride, + topleft.cast(), + width, + height, + a, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +unsafe fn ipred_h_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, topleft: *const BD::Pixel, @@ -523,7 +546,31 @@ pub(crate) unsafe fn ipred_h_rust( } // TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_paeth_rust( +pub(crate) unsafe extern "C" fn ipred_h_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + a: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_h_rust::( + dst.cast(), + stride, + topleft.cast(), + width, + height, + a, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +unsafe fn ipred_paeth_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, tl_ptr: *const BD::Pixel, @@ -561,7 +608,31 @@ pub(crate) unsafe fn ipred_paeth_rust( } // TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_smooth_rust( +pub(crate) unsafe extern "C" fn ipred_paeth_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + tl_ptr: *const DynPixel, + width: c_int, + height: c_int, + a: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_paeth_rust::( + dst.cast(), + stride, + tl_ptr.cast(), + width, + height, + a, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +unsafe fn ipred_smooth_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, topleft: *const BD::Pixel, @@ -595,7 +666,31 @@ pub(crate) unsafe fn ipred_smooth_rust( } // TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_smooth_v_rust( +pub(crate) unsafe extern "C" fn ipred_smooth_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + a: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_smooth_rust::( + dst.cast(), + stride, + topleft.cast(), + width, + height, + a, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +unsafe fn ipred_smooth_v_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, topleft: *const BD::Pixel, @@ -624,7 +719,31 @@ pub(crate) unsafe fn ipred_smooth_v_rust( } // TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_smooth_h_rust( +pub(crate) unsafe extern "C" fn ipred_smooth_v_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + a: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_smooth_v_rust::( + dst.cast(), + stride, + topleft.cast(), + width, + height, + a, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +unsafe fn ipred_smooth_h_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, topleft: *const BD::Pixel, @@ -652,6 +771,31 @@ pub(crate) unsafe fn ipred_smooth_h_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_smooth_h_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft: *const DynPixel, + width: c_int, + height: c_int, + a: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_smooth_h_rust::( + dst.cast(), + stride, + topleft.cast(), + width, + height, + a, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 711826b12..c75372bd2 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -14,12 +14,12 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; -use crate::src::ipred::ipred_h_rust; -use crate::src::ipred::ipred_paeth_rust; -use crate::src::ipred::ipred_smooth_h_rust; -use crate::src::ipred::ipred_smooth_rust; -use crate::src::ipred::ipred_smooth_v_rust; -use crate::src::ipred::ipred_v_rust; +use crate::src::ipred::ipred_h_c_erased; +use crate::src::ipred::ipred_paeth_c_erased; +use crate::src::ipred::ipred_smooth_c_erased; +use crate::src::ipred::ipred_smooth_h_c_erased; +use crate::src::ipred::ipred_smooth_v_c_erased; +use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -161,150 +161,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -unsafe extern "C" fn ipred_v_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_v_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -unsafe extern "C" fn ipred_h_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_h_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -unsafe extern "C" fn ipred_paeth_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - tl_ptr: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_paeth_rust::( - dst.cast(), - stride, - tl_ptr.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -unsafe extern "C" fn ipred_smooth_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_smooth_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -unsafe extern "C" fn ipred_smooth_v_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_smooth_v_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -unsafe extern "C" fn ipred_smooth_h_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_smooth_h_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - #[inline(never)] unsafe fn filter_edge( out: *mut pixel, @@ -1603,12 +1459,12 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased::); (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); - (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased); - (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased); - (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased); + (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased::); + (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased::); + (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased::); + (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased::); + (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased::); + (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased::); (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased); (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased); (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased); diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 0a40dd7db..aef94a7ad 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -15,12 +15,12 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; -use crate::src::ipred::ipred_h_rust; -use crate::src::ipred::ipred_paeth_rust; -use crate::src::ipred::ipred_smooth_h_rust; -use crate::src::ipred::ipred_smooth_rust; -use crate::src::ipred::ipred_smooth_v_rust; -use crate::src::ipred::ipred_v_rust; +use crate::src::ipred::ipred_h_c_erased; +use crate::src::ipred::ipred_paeth_c_erased; +use crate::src::ipred::ipred_smooth_c_erased; +use crate::src::ipred::ipred_smooth_h_c_erased; +use crate::src::ipred::ipred_smooth_v_c_erased; +use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -140,150 +140,6 @@ extern "C" { pub type pixel = u8; -unsafe extern "C" fn ipred_v_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_v_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -unsafe extern "C" fn ipred_h_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_h_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -unsafe extern "C" fn ipred_paeth_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - tl_ptr: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_paeth_rust::( - dst.cast(), - stride, - tl_ptr.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -unsafe extern "C" fn ipred_smooth_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_smooth_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -unsafe extern "C" fn ipred_smooth_v_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_smooth_v_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -unsafe extern "C" fn ipred_smooth_h_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft: *const DynPixel, - width: c_int, - height: c_int, - a: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_smooth_h_rust::( - dst.cast(), - stride, - topleft.cast(), - width, - height, - a, - max_width, - max_height, - BitDepth8::new(()), - ); -} - #[inline(never)] unsafe fn filter_edge( out: *mut pixel, @@ -1549,12 +1405,12 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased::); (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); - (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased); - (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased); - (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased); + (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased::); + (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased::); + (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased::); + (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased::); + (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased::); + (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased::); (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased); (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased); (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased); From 3f2234feabc5474b85de3d549e405a8b0a1aa5fd Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 22:20:46 -0800 Subject: [PATCH 26/49] `fn filter_edge`: Deduplicate w/ generics. --- src/ipred.rs | 40 ++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 46 +++++--------------------------------------- src/ipred_tmpl_8.rs | 46 +++++--------------------------------------- 3 files changed, 50 insertions(+), 82 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 3302b1cdf..ca7ad2b11 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -4,9 +4,11 @@ use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::DynPixel; use crate::include::common::bitdepth::BPC; use crate::include::common::intops::apply_sign; +use crate::include::common::intops::iclip; use crate::src::tables::dav1d_sm_weights; use cfg_if::cfg_if; use libc::ptrdiff_t; +use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ffi::c_ulong; @@ -853,6 +855,44 @@ pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) return 0 as c_int; } +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[inline(never)] +pub(crate) unsafe fn filter_edge( + out: *mut BD::Pixel, + sz: c_int, + lim_from: c_int, + lim_to: c_int, + in_0: *const BD::Pixel, + from: c_int, + to: c_int, + strength: c_int, +) { + static kernel: [[u8; 5]; 3] = [[0, 4, 8, 4, 0], [0, 5, 6, 5, 0], [2, 4, 4, 4, 2]]; + if !(strength > 0) { + unreachable!(); + } + let mut i = 0; + while i < cmp::min(sz, lim_from) { + *out.offset(i as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); + i += 1; + } + while i < cmp::min(lim_to, sz) { + let mut s = 0; + let mut j = 0; + while j < 5 { + s += (*in_0.offset(iclip(i - 2 + j, from, to - 1) as isize)).as_::() + * kernel[(strength - 1) as usize][j as usize] as c_int; + j += 1; + } + *out.offset(i as isize) = (s + 8 >> 4).as_::(); + i += 1; + } + while i < sz { + *out.offset(i as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); + i += 1; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline] pub(crate) unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index c75372bd2..624a09e35 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -4,6 +4,7 @@ use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::ipred::filter_edge; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_128_c_erased; @@ -161,43 +162,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -#[inline(never)] -unsafe fn filter_edge( - out: *mut pixel, - sz: c_int, - lim_from: c_int, - lim_to: c_int, - in_0: *const pixel, - from: c_int, - to: c_int, - strength: c_int, -) { - static kernel: [[u8; 5]; 3] = [[0, 4, 8, 4, 0], [0, 5, 6, 5, 0], [2, 4, 4, 4, 2]]; - if !(strength > 0) { - unreachable!(); - } - let mut i = 0; - while i < cmp::min(sz, lim_from) { - *out.offset(i as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); - i += 1; - } - while i < cmp::min(lim_to, sz) { - let mut s = 0; - let mut j = 0; - while j < 5 { - s += *in_0.offset(iclip(i - 2 + j, from, to - 1) as isize) as c_int - * kernel[(strength - 1) as usize][j as usize] as c_int; - j += 1; - } - *out.offset(i as isize) = (s + 8 >> 4) as pixel; - i += 1; - } - while i < sz { - *out.offset(i as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); - i += 1; - } -} - #[inline(never)] unsafe fn upsample_edge( out: *mut pixel, @@ -294,7 +258,7 @@ unsafe fn ipred_z1_rust( 0 as c_int }; if filter_strength != 0 { - filter_edge( + filter_edge::( top_out.as_mut_ptr(), width + height, 0 as c_int, @@ -412,7 +376,7 @@ unsafe fn ipred_z2_rust( 0 as c_int }; if filter_strength != 0 { - filter_edge( + filter_edge::( &mut *topleft.offset(1), width, 0 as c_int, @@ -447,7 +411,7 @@ unsafe fn ipred_z2_rust( 0 as c_int }; if filter_strength_0 != 0 { - filter_edge( + filter_edge::( &mut *topleft.offset(-height as isize), height, height - max_height, @@ -571,7 +535,7 @@ unsafe fn ipred_z3_rust( 0 as c_int }; if filter_strength != 0 { - filter_edge( + filter_edge::( left_out.as_mut_ptr(), width + height, 0 as c_int, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index aef94a7ad..173f1d0b0 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -5,6 +5,7 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::ipred::filter_edge; use crate::src::ipred::get_filter_strength; use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_128_c_erased; @@ -140,43 +141,6 @@ extern "C" { pub type pixel = u8; -#[inline(never)] -unsafe fn filter_edge( - out: *mut pixel, - sz: c_int, - lim_from: c_int, - lim_to: c_int, - in_0: *const pixel, - from: c_int, - to: c_int, - strength: c_int, -) { - static kernel: [[u8; 5]; 3] = [[0, 4, 8, 4, 0], [0, 5, 6, 5, 0], [2, 4, 4, 4, 2]]; - if !(strength > 0) { - unreachable!(); - } - let mut i = 0; - while i < cmp::min(sz, lim_from) { - *out.offset(i as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); - i += 1; - } - while i < cmp::min(lim_to, sz) { - let mut s = 0; - let mut j = 0; - while j < 5 { - s += *in_0.offset(iclip(i - 2 + j, from, to - 1) as isize) as c_int - * kernel[(strength - 1) as usize][j as usize] as c_int; - j += 1; - } - *out.offset(i as isize) = (s + 8 >> 4) as pixel; - i += 1; - } - while i < sz { - *out.offset(i as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); - i += 1; - } -} - #[inline(never)] unsafe fn upsample_edge(out: *mut pixel, hsz: c_int, in_0: *const pixel, from: c_int, to: c_int) { static kernel: [i8; 4] = [-1, 9, 9, -1]; @@ -263,7 +227,7 @@ unsafe fn ipred_z1_rust( 0 as c_int }; if filter_strength != 0 { - filter_edge( + filter_edge::( top_out.as_mut_ptr(), width + height, 0 as c_int, @@ -372,7 +336,7 @@ unsafe fn ipred_z2_rust( 0 as c_int }; if filter_strength != 0 { - filter_edge( + filter_edge::( &mut *topleft.offset(1), width, 0 as c_int, @@ -406,7 +370,7 @@ unsafe fn ipred_z2_rust( 0 as c_int }; if filter_strength_0 != 0 { - filter_edge( + filter_edge::( &mut *topleft.offset(-height as isize), height, height - max_height, @@ -527,7 +491,7 @@ unsafe fn ipred_z3_rust( 0 as c_int }; if filter_strength != 0 { - filter_edge( + filter_edge::( left_out.as_mut_ptr(), width + height, 0 as c_int, From d17fc43321bd45cb21b9ad1c41961105aa5a56a6 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 22:25:48 -0800 Subject: [PATCH 27/49] `fn upsample_edge`: Deduplicate w/ generics. --- src/ipred.rs | 29 +++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 44 +++++++++----------------------------------- src/ipred_tmpl_8.rs | 40 +++++++++++++++------------------------- 3 files changed, 53 insertions(+), 60 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index ca7ad2b11..54d8e19e2 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -899,6 +899,35 @@ pub(crate) unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_in return (angle < 40 && wh <= 16 >> is_sm) as c_int; } +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[inline(never)] +pub(crate) unsafe fn upsample_edge( + out: *mut BD::Pixel, + hsz: c_int, + in_0: *const BD::Pixel, + from: c_int, + to: c_int, + bd: BD, +) { + static kernel: [i8; 4] = [-1, 9, 9, -1]; + let mut i; + i = 0 as c_int; + while i < hsz - 1 { + *out.offset((i * 2) as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); + let mut s = 0; + let mut j = 0; + while j < 4 { + s += (*in_0.offset(iclip(i + j - 1, from, to - 1) as isize)).as_::() + * kernel[j as usize] as c_int; + j += 1; + } + *out.offset((i * 2 + 1) as isize) = + iclip(s + 8 >> 4, 0 as c_int, bd.bitdepth_max().as_::()).as_::(); + i += 1; + } + *out.offset((i * 2) as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated pub(crate) unsafe fn filter_fn( flt_ptr: *const i8, diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 624a09e35..cf2d97cac 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -21,6 +21,7 @@ use crate::src::ipred::ipred_smooth_c_erased; use crate::src::ipred::ipred_smooth_h_c_erased; use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; +use crate::src::ipred::upsample_edge; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -162,33 +163,6 @@ unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { } } -#[inline(never)] -unsafe fn upsample_edge( - out: *mut pixel, - hsz: c_int, - in_0: *const pixel, - from: c_int, - to: c_int, - bitdepth_max: c_int, -) { - static kernel: [i8; 4] = [-1, 9, 9, -1]; - let mut i; - i = 0 as c_int; - while i < hsz - 1 { - *out.offset((i * 2) as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); - let mut s = 0; - let mut j = 0; - while j < 4 { - s += *in_0.offset(iclip(i + j - 1, from, to - 1) as isize) as c_int - * kernel[j as usize] as c_int; - j += 1; - } - *out.offset((i * 2 + 1) as isize) = iclip(s + 8 >> 4, 0 as c_int, bitdepth_max) as pixel; - i += 1; - } - *out.offset((i * 2) as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); -} - unsafe extern "C" fn ipred_z1_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -240,13 +214,13 @@ unsafe fn ipred_z1_rust( 0 as c_int }; if upsample_above != 0 { - upsample_edge( + upsample_edge::( top_out.as_mut_ptr(), width + height, &*topleft_in.offset(1), -(1 as c_int), width + cmp::min(width, height), - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); top = top_out.as_mut_ptr(); max_base_x = 2 * (width + height) - 2; @@ -360,13 +334,13 @@ unsafe fn ipred_z2_rust( let mut edge: [pixel; 129] = [0; 129]; let topleft: *mut pixel = &mut *edge.as_mut_ptr().offset(64) as *mut pixel; if upsample_above != 0 { - upsample_edge( + upsample_edge::( topleft, width + 1, topleft_in, 0 as c_int, width + 1, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); dx <<= 1; } else { @@ -395,13 +369,13 @@ unsafe fn ipred_z2_rust( } } if upsample_left != 0 { - upsample_edge( + upsample_edge::( &mut *topleft.offset((-height * 2) as isize), height + 1, &*topleft_in.offset(-height as isize), 0 as c_int, height + 1, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); dy <<= 1; } else { @@ -515,13 +489,13 @@ unsafe fn ipred_z3_rust( 0 as c_int }; if upsample_left != 0 { - upsample_edge( + upsample_edge::( left_out.as_mut_ptr(), width + height, &*topleft_in.offset(-(width + height) as isize), cmp::max(width - height, 0 as c_int), width + height + 1, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); left = &mut *left_out .as_mut_ptr() diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 173f1d0b0..791f32041 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -2,7 +2,6 @@ use crate::include::common::attributes::ctz; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; -use crate::include::common::intops::iclip; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::filter_edge; @@ -22,6 +21,7 @@ use crate::src::ipred::ipred_smooth_c_erased; use crate::src::ipred::ipred_smooth_h_c_erased; use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; +use crate::src::ipred::upsample_edge; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -141,26 +141,6 @@ extern "C" { pub type pixel = u8; -#[inline(never)] -unsafe fn upsample_edge(out: *mut pixel, hsz: c_int, in_0: *const pixel, from: c_int, to: c_int) { - static kernel: [i8; 4] = [-1, 9, 9, -1]; - let mut i; - i = 0 as c_int; - while i < hsz - 1 { - *out.offset((i * 2) as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); - let mut s = 0; - let mut j = 0; - while j < 4 { - s += *in_0.offset(iclip(i + j - 1, from, to - 1) as isize) as c_int - * kernel[j as usize] as c_int; - j += 1; - } - *out.offset((i * 2 + 1) as isize) = iclip_u8(s + 8 >> 4) as pixel; - i += 1; - } - *out.offset((i * 2) as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); -} - unsafe extern "C" fn ipred_z1_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -210,12 +190,13 @@ unsafe fn ipred_z1_rust( 0 as c_int }; if upsample_above != 0 { - upsample_edge( + upsample_edge::( top_out.as_mut_ptr(), width + height, &*topleft_in.offset(1), -(1 as c_int), width + cmp::min(width, height), + BitDepth8::new(()), ); top = top_out.as_mut_ptr(); max_base_x = 2 * (width + height) - 2; @@ -327,7 +308,14 @@ unsafe fn ipred_z2_rust( let mut edge: [pixel; 129] = [0; 129]; let topleft: *mut pixel = &mut *edge.as_mut_ptr().offset(64) as *mut pixel; if upsample_above != 0 { - upsample_edge(topleft, width + 1, topleft_in, 0 as c_int, width + 1); + upsample_edge::( + topleft, + width + 1, + topleft_in, + 0 as c_int, + width + 1, + BitDepth8::new(()), + ); dx <<= 1; } else { let filter_strength = if enable_intra_edge_filter != 0 { @@ -355,12 +343,13 @@ unsafe fn ipred_z2_rust( } } if upsample_left != 0 { - upsample_edge( + upsample_edge::( &mut *topleft.offset((-height * 2) as isize), height + 1, &*topleft_in.offset(-height as isize), 0 as c_int, height + 1, + BitDepth8::new(()), ); dy <<= 1; } else { @@ -472,12 +461,13 @@ unsafe fn ipred_z3_rust( 0 as c_int }; if upsample_left != 0 { - upsample_edge( + upsample_edge::( left_out.as_mut_ptr(), width + height, &*topleft_in.offset(-(width + height) as isize), cmp::max(width - height, 0 as c_int), width + height + 1, + BitDepth8::new(()), ); left = &mut *left_out .as_mut_ptr() From 0227d72c2d76df3b0da039cc1d435217aec754c0 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 22:34:19 -0800 Subject: [PATCH 28/49] `fn ipred_z1_rust`: Deduplicate w/ generics. --- src/ipred.rs | 95 +++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 105 ++----------------------------------------- src/ipred_tmpl_8.rs | 95 ++------------------------------------- 3 files changed, 101 insertions(+), 194 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 54d8e19e2..aa547be89 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -5,6 +5,7 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::bitdepth::BPC; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; +use crate::src::tables::dav1d_dr_intra_derivative; use crate::src::tables::dav1d_sm_weights; use cfg_if::cfg_if; use libc::ptrdiff_t; @@ -928,6 +929,100 @@ pub(crate) unsafe fn upsample_edge( *out.offset((i * 2) as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_z1_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft_in: *const BD::Pixel, + width: c_int, + height: c_int, + mut angle: c_int, + _max_width: c_int, + _max_height: c_int, + bd: BD, +) { + let is_sm = angle >> 9 & 0x1 as c_int; + let enable_intra_edge_filter = angle >> 10; + angle &= 511 as c_int; + if !(angle < 90) { + unreachable!(); + } + let mut dx = dav1d_dr_intra_derivative[(angle >> 1) as usize] as c_int; + let mut top_out: [BD::Pixel; 128] = [0.into(); 128]; + let top: *const BD::Pixel; + let max_base_x; + let upsample_above = if enable_intra_edge_filter != 0 { + get_upsample(width + height, 90 - angle, is_sm) + } else { + 0 as c_int + }; + if upsample_above != 0 { + upsample_edge::( + top_out.as_mut_ptr(), + width + height, + &*topleft_in.offset(1), + -(1 as c_int), + width + cmp::min(width, height), + bd, + ); + top = top_out.as_mut_ptr(); + max_base_x = 2 * (width + height) - 2; + dx <<= 1; + } else { + let filter_strength = if enable_intra_edge_filter != 0 { + get_filter_strength(width + height, 90 - angle, is_sm) + } else { + 0 as c_int + }; + if filter_strength != 0 { + filter_edge::( + top_out.as_mut_ptr(), + width + height, + 0 as c_int, + width + height, + &*topleft_in.offset(1), + -(1 as c_int), + width + cmp::min(width, height), + filter_strength, + ); + top = top_out.as_mut_ptr(); + max_base_x = width + height - 1; + } else { + top = &*topleft_in.offset(1) as *const BD::Pixel; + max_base_x = width + cmp::min(width, height) - 1; + } + } + let base_inc = 1 + upsample_above; + let mut y = 0; + let mut xpos = dx; + while y < height { + let frac = xpos & 0x3e as c_int; + let mut x = 0; + let mut base = xpos >> 6; + while x < width { + if base < max_base_x { + let v = (*top.offset(base as isize)).as_::() * (64 - frac) + + (*top.offset((base + 1) as isize)).as_::() * frac; + *dst.offset(x as isize) = (v + 32 >> 6).as_::(); + x += 1; + base += base_inc; + } else { + let width = width.try_into().unwrap(); + let x = x as usize; + BD::pixel_set( + &mut slice::from_raw_parts_mut(dst, width)[x..], + *top.offset(max_base_x as isize), + width - x, + ); + break; + } + } + y += 1; + dst = dst.offset(BD::pxstride(stride as usize) as isize); + xpos += dx; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated pub(crate) unsafe fn filter_fn( flt_ptr: *const i8, diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index cf2d97cac..4914bf0e5 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -21,6 +21,7 @@ use crate::src::ipred::ipred_smooth_c_erased; use crate::src::ipred::ipred_smooth_h_c_erased; use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; +use crate::src::ipred::ipred_z1_rust; use crate::src::ipred::upsample_edge; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -154,15 +155,6 @@ unsafe fn PXSTRIDE(x: ptrdiff_t) -> ptrdiff_t { return x >> 1; } -#[inline] -unsafe fn pixel_set(dst: *mut pixel, val: c_int, num: c_int) { - let mut n = 0; - while n < num { - *dst.offset(n as isize) = val as pixel; - n += 1; - } -} - unsafe extern "C" fn ipred_z1_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -174,7 +166,7 @@ unsafe extern "C" fn ipred_z1_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z1_rust( + ipred_z1_rust::( dst.cast(), stride, topleft_in.cast(), @@ -183,101 +175,10 @@ unsafe extern "C" fn ipred_z1_c_erased( angle, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_z1_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle < 90) { - unreachable!(); - } - let mut dx = dav1d_dr_intra_derivative[(angle >> 1) as usize] as c_int; - let mut top_out: [pixel; 128] = [0; 128]; - let top: *const pixel; - let max_base_x; - let upsample_above = if enable_intra_edge_filter != 0 { - get_upsample(width + height, 90 - angle, is_sm) - } else { - 0 as c_int - }; - if upsample_above != 0 { - upsample_edge::( - top_out.as_mut_ptr(), - width + height, - &*topleft_in.offset(1), - -(1 as c_int), - width + cmp::min(width, height), - BitDepth16::from_c(bitdepth_max), - ); - top = top_out.as_mut_ptr(); - max_base_x = 2 * (width + height) - 2; - dx <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, 90 - angle, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - filter_edge::( - top_out.as_mut_ptr(), - width + height, - 0 as c_int, - width + height, - &*topleft_in.offset(1), - -(1 as c_int), - width + cmp::min(width, height), - filter_strength, - ); - top = top_out.as_mut_ptr(); - max_base_x = width + height - 1; - } else { - top = &*topleft_in.offset(1) as *const pixel; - max_base_x = width + cmp::min(width, height) - 1; - } - } - let base_inc = 1 + upsample_above; - let mut y = 0; - let mut xpos = dx; - while y < height { - let frac = xpos & 0x3e as c_int; - let mut x = 0; - let mut base = xpos >> 6; - while x < width { - if base < max_base_x { - let v = *top.offset(base as isize) as c_int * (64 - frac) - + *top.offset((base + 1) as isize) as c_int * frac; - *dst.offset(x as isize) = (v + 32 >> 6) as pixel; - x += 1; - base += base_inc; - } else { - pixel_set( - &mut *dst.offset(x as isize), - *top.offset(max_base_x as isize) as c_int, - width - x, - ); - break; - } - } - y += 1; - dst = dst.offset(PXSTRIDE(stride) as isize); - xpos += dx; - } -} - unsafe extern "C" fn ipred_z2_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 791f32041..bf7559669 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -21,6 +21,7 @@ use crate::src::ipred::ipred_smooth_c_erased; use crate::src::ipred::ipred_smooth_h_c_erased; use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; +use crate::src::ipred::ipred_z1_rust; use crate::src::ipred::upsample_edge; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -40,7 +41,6 @@ use crate::src::levels::Z3_PRED; use crate::src::tables::dav1d_dr_intra_derivative; use crate::src::tables::dav1d_filter_intra_taps; use libc::memcpy; -use libc::memset; use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; @@ -152,7 +152,7 @@ unsafe extern "C" fn ipred_z1_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z1_rust( + ipred_z1_rust::( dst.cast(), stride, topleft_in.cast(), @@ -161,99 +161,10 @@ unsafe extern "C" fn ipred_z1_c_erased( angle, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_z1_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle < 90) { - unreachable!(); - } - let mut dx = dav1d_dr_intra_derivative[(angle >> 1) as usize] as c_int; - let mut top_out: [pixel; 128] = [0; 128]; - let top: *const pixel; - let max_base_x; - let upsample_above = if enable_intra_edge_filter != 0 { - get_upsample(width + height, 90 - angle, is_sm) - } else { - 0 as c_int - }; - if upsample_above != 0 { - upsample_edge::( - top_out.as_mut_ptr(), - width + height, - &*topleft_in.offset(1), - -(1 as c_int), - width + cmp::min(width, height), - BitDepth8::new(()), - ); - top = top_out.as_mut_ptr(); - max_base_x = 2 * (width + height) - 2; - dx <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, 90 - angle, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - filter_edge::( - top_out.as_mut_ptr(), - width + height, - 0 as c_int, - width + height, - &*topleft_in.offset(1), - -(1 as c_int), - width + cmp::min(width, height), - filter_strength, - ); - top = top_out.as_mut_ptr(); - max_base_x = width + height - 1; - } else { - top = &*topleft_in.offset(1) as *const pixel; - max_base_x = width + cmp::min(width, height) - 1; - } - } - let base_inc = 1 + upsample_above; - let mut y = 0; - let mut xpos = dx; - while y < height { - let frac = xpos & 0x3e as c_int; - let mut x = 0; - let mut base = xpos >> 6; - while x < width { - if base < max_base_x { - let v = *top.offset(base as isize) as c_int * (64 - frac) - + *top.offset((base + 1) as isize) as c_int * frac; - *dst.offset(x as isize) = (v + 32 >> 6) as pixel; - x += 1; - base += base_inc; - } else { - memset( - &mut *dst.offset(x as isize) as *mut pixel as *mut c_void, - *top.offset(max_base_x as isize) as c_int, - (width - x) as usize, - ); - break; - } - } - y += 1; - dst = dst.offset(stride as isize); - xpos += dx; - } -} - unsafe extern "C" fn ipred_z2_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, From fe0a45204137e14f7bf4623ba35499e811a3c215 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 22:42:34 -0800 Subject: [PATCH 29/49] `fn ipred_z2_rust`: Deduplicate w/ generics. --- src/ipred.rs | 138 ++++++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 141 +------------------------------------------ src/ipred_tmpl_8.rs | 139 +----------------------------------------- 3 files changed, 144 insertions(+), 274 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index aa547be89..284e1bad5 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1023,6 +1023,144 @@ pub(crate) unsafe fn ipred_z1_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_z2_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft_in: *const BD::Pixel, + width: c_int, + height: c_int, + mut angle: c_int, + max_width: c_int, + max_height: c_int, + bd: BD, +) { + let is_sm = angle >> 9 & 0x1 as c_int; + let enable_intra_edge_filter = angle >> 10; + angle &= 511 as c_int; + if !(angle > 90 && angle < 180) { + unreachable!(); + } + let mut dy = dav1d_dr_intra_derivative[(angle - 90 >> 1) as usize] as c_int; + let mut dx = dav1d_dr_intra_derivative[(180 - angle >> 1) as usize] as c_int; + let upsample_left = if enable_intra_edge_filter != 0 { + get_upsample(width + height, 180 - angle, is_sm) + } else { + 0 as c_int + }; + let upsample_above = if enable_intra_edge_filter != 0 { + get_upsample(width + height, angle - 90, is_sm) + } else { + 0 as c_int + }; + let mut edge: [BD::Pixel; 129] = [0.into(); 129]; + let topleft: *mut BD::Pixel = &mut *edge.as_mut_ptr().offset(64) as *mut BD::Pixel; + if upsample_above != 0 { + upsample_edge::(topleft, width + 1, topleft_in, 0 as c_int, width + 1, bd); + dx <<= 1; + } else { + let filter_strength = if enable_intra_edge_filter != 0 { + get_filter_strength(width + height, angle - 90, is_sm) + } else { + 0 as c_int + }; + if filter_strength != 0 { + filter_edge::( + &mut *topleft.offset(1), + width, + 0 as c_int, + max_width, + &*topleft_in.offset(1), + -(1 as c_int), + width, + filter_strength, + ); + } else { + let width = width.try_into().unwrap(); + BD::pixel_copy( + &mut slice::from_raw_parts_mut(topleft, width + 1)[1..], + &slice::from_raw_parts(topleft_in, width + 1)[1..], + width, + ); + } + } + if upsample_left != 0 { + upsample_edge::( + &mut *topleft.offset((-height * 2) as isize), + height + 1, + &*topleft_in.offset(-height as isize), + 0 as c_int, + height + 1, + bd, + ); + dy <<= 1; + } else { + let filter_strength_0 = if enable_intra_edge_filter != 0 { + get_filter_strength(width + height, 180 - angle, is_sm) + } else { + 0 as c_int + }; + if filter_strength_0 != 0 { + filter_edge::( + &mut *topleft.offset(-height as isize), + height, + height - max_height, + height, + &*topleft_in.offset(-height as isize), + 0 as c_int, + height + 1, + filter_strength_0, + ); + } else { + BD::pixel_copy( + slice::from_raw_parts_mut( + topleft.offset(-height as isize), + height.try_into().unwrap(), + ), + slice::from_raw_parts( + topleft_in.offset(-height as isize), + height.try_into().unwrap(), + ), + height.try_into().unwrap(), + ); + } + } + *topleft = *topleft_in; + let base_inc_x = 1 + upsample_above; + let left: *const BD::Pixel = + &mut *topleft.offset(-(1 + upsample_left) as isize) as *mut BD::Pixel; + let mut y = 0; + let mut xpos = (1 + upsample_above << 6) - dx; + while y < height { + let mut base_x = xpos >> 6; + let frac_x = xpos & 0x3e as c_int; + let mut x = 0; + let mut ypos = (y << 6 + upsample_left) - dy; + while x < width { + let v; + if base_x >= 0 { + v = (*topleft.offset(base_x as isize)).as_::() * (64 - frac_x) + + (*topleft.offset((base_x + 1) as isize)).as_::() * frac_x; + } else { + let base_y = ypos >> 6; + if !(base_y >= -(1 + upsample_left)) { + unreachable!(); + } + let frac_y = ypos & 0x3e as c_int; + v = (*left.offset(-base_y as isize)).as_::() * (64 - frac_y) + + (*left.offset(-(base_y + 1) as isize)).as_::() * frac_y; + } + *dst.offset(x as isize) = (v + 32 >> 6).as_::(); + x += 1; + base_x += base_inc_x; + ypos -= dy; + } + y += 1; + xpos -= dx; + dst = dst.offset(BD::pxstride(stride as usize) as isize); + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated pub(crate) unsafe fn filter_fn( flt_ptr: *const i8, diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 4914bf0e5..dd79444e2 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -22,6 +22,7 @@ use crate::src::ipred::ipred_smooth_h_c_erased; use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::ipred_z1_rust; +use crate::src::ipred::ipred_z2_rust; use crate::src::ipred::upsample_edge; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -190,7 +191,7 @@ unsafe extern "C" fn ipred_z2_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z2_rust( + ipred_z2_rust::( dst.cast(), stride, topleft_in.cast(), @@ -199,146 +200,10 @@ unsafe extern "C" fn ipred_z2_c_erased( angle, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_z2_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle > 90 && angle < 180) { - unreachable!(); - } - let mut dy = dav1d_dr_intra_derivative[(angle - 90 >> 1) as usize] as c_int; - let mut dx = dav1d_dr_intra_derivative[(180 - angle >> 1) as usize] as c_int; - let upsample_left = if enable_intra_edge_filter != 0 { - get_upsample(width + height, 180 - angle, is_sm) - } else { - 0 as c_int - }; - let upsample_above = if enable_intra_edge_filter != 0 { - get_upsample(width + height, angle - 90, is_sm) - } else { - 0 as c_int - }; - let mut edge: [pixel; 129] = [0; 129]; - let topleft: *mut pixel = &mut *edge.as_mut_ptr().offset(64) as *mut pixel; - if upsample_above != 0 { - upsample_edge::( - topleft, - width + 1, - topleft_in, - 0 as c_int, - width + 1, - BitDepth16::from_c(bitdepth_max), - ); - dx <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, angle - 90, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - filter_edge::( - &mut *topleft.offset(1), - width, - 0 as c_int, - max_width, - &*topleft_in.offset(1), - -(1 as c_int), - width, - filter_strength, - ); - } else { - memcpy( - &mut *topleft.offset(1) as *mut pixel as *mut c_void, - &*topleft_in.offset(1) as *const pixel as *const c_void, - (width << 1) as usize, - ); - } - } - if upsample_left != 0 { - upsample_edge::( - &mut *topleft.offset((-height * 2) as isize), - height + 1, - &*topleft_in.offset(-height as isize), - 0 as c_int, - height + 1, - BitDepth16::from_c(bitdepth_max), - ); - dy <<= 1; - } else { - let filter_strength_0 = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, 180 - angle, is_sm) - } else { - 0 as c_int - }; - if filter_strength_0 != 0 { - filter_edge::( - &mut *topleft.offset(-height as isize), - height, - height - max_height, - height, - &*topleft_in.offset(-height as isize), - 0 as c_int, - height + 1, - filter_strength_0, - ); - } else { - memcpy( - &mut *topleft.offset(-height as isize) as *mut pixel as *mut c_void, - &*topleft_in.offset(-height as isize) as *const pixel as *const c_void, - (height << 1) as usize, - ); - } - } - *topleft = *topleft_in; - let base_inc_x = 1 + upsample_above; - let left: *const pixel = &mut *topleft.offset(-(1 + upsample_left) as isize) as *mut pixel; - let mut y = 0; - let mut xpos = (1 + upsample_above << 6) - dx; - while y < height { - let mut base_x = xpos >> 6; - let frac_x = xpos & 0x3e as c_int; - let mut x = 0; - let mut ypos = (y << 6 + upsample_left) - dy; - while x < width { - let v; - if base_x >= 0 { - v = *topleft.offset(base_x as isize) as c_int * (64 - frac_x) - + *topleft.offset((base_x + 1) as isize) as c_int * frac_x; - } else { - let base_y = ypos >> 6; - if !(base_y >= -(1 + upsample_left)) { - unreachable!(); - } - let frac_y = ypos & 0x3e as c_int; - v = *left.offset(-base_y as isize) as c_int * (64 - frac_y) - + *left.offset(-(base_y + 1) as isize) as c_int * frac_y; - } - *dst.offset(x as isize) = (v + 32 >> 6) as pixel; - x += 1; - base_x += base_inc_x; - ypos -= dy; - } - y += 1; - xpos -= dx; - dst = dst.offset(PXSTRIDE(stride) as isize); - } -} - unsafe extern "C" fn ipred_z3_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index bf7559669..43232001c 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -22,6 +22,7 @@ use crate::src::ipred::ipred_smooth_h_c_erased; use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::ipred_z1_rust; +use crate::src::ipred::ipred_z2_rust; use crate::src::ipred::upsample_edge; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -176,7 +177,7 @@ unsafe extern "C" fn ipred_z2_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z2_rust( + ipred_z2_rust::( dst.cast(), stride, topleft_in.cast(), @@ -185,144 +186,10 @@ unsafe extern "C" fn ipred_z2_c_erased( angle, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_z2_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - max_width: c_int, - max_height: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle > 90 && angle < 180) { - unreachable!(); - } - let mut dy = dav1d_dr_intra_derivative[(angle - 90 >> 1) as usize] as c_int; - let mut dx = dav1d_dr_intra_derivative[(180 - angle >> 1) as usize] as c_int; - let upsample_left = if enable_intra_edge_filter != 0 { - get_upsample(width + height, 180 - angle, is_sm) - } else { - 0 as c_int - }; - let upsample_above = if enable_intra_edge_filter != 0 { - get_upsample(width + height, angle - 90, is_sm) - } else { - 0 as c_int - }; - let mut edge: [pixel; 129] = [0; 129]; - let topleft: *mut pixel = &mut *edge.as_mut_ptr().offset(64) as *mut pixel; - if upsample_above != 0 { - upsample_edge::( - topleft, - width + 1, - topleft_in, - 0 as c_int, - width + 1, - BitDepth8::new(()), - ); - dx <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, angle - 90, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - filter_edge::( - &mut *topleft.offset(1), - width, - 0 as c_int, - max_width, - &*topleft_in.offset(1), - -(1 as c_int), - width, - filter_strength, - ); - } else { - memcpy( - &mut *topleft.offset(1) as *mut pixel as *mut c_void, - &*topleft_in.offset(1) as *const pixel as *const c_void, - width as usize, - ); - } - } - if upsample_left != 0 { - upsample_edge::( - &mut *topleft.offset((-height * 2) as isize), - height + 1, - &*topleft_in.offset(-height as isize), - 0 as c_int, - height + 1, - BitDepth8::new(()), - ); - dy <<= 1; - } else { - let filter_strength_0 = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, 180 - angle, is_sm) - } else { - 0 as c_int - }; - if filter_strength_0 != 0 { - filter_edge::( - &mut *topleft.offset(-height as isize), - height, - height - max_height, - height, - &*topleft_in.offset(-height as isize), - 0 as c_int, - height + 1, - filter_strength_0, - ); - } else { - memcpy( - &mut *topleft.offset(-height as isize) as *mut pixel as *mut c_void, - &*topleft_in.offset(-height as isize) as *const pixel as *const c_void, - height as usize, - ); - } - } - *topleft = *topleft_in; - let base_inc_x = 1 + upsample_above; - let left: *const pixel = &mut *topleft.offset(-(1 + upsample_left) as isize) as *mut pixel; - let mut y = 0; - let mut xpos = (1 + upsample_above << 6) - dx; - while y < height { - let mut base_x = xpos >> 6; - let frac_x = xpos & 0x3e as c_int; - let mut x = 0; - let mut ypos = (y << 6 + upsample_left) - dy; - while x < width { - let v; - if base_x >= 0 { - v = *topleft.offset(base_x as isize) as c_int * (64 - frac_x) - + *topleft.offset((base_x + 1) as isize) as c_int * frac_x; - } else { - let base_y = ypos >> 6; - if !(base_y >= -(1 + upsample_left)) { - unreachable!(); - } - let frac_y = ypos & 0x3e as c_int; - v = *left.offset(-base_y as isize) as c_int * (64 - frac_y) - + *left.offset(-(base_y + 1) as isize) as c_int * frac_y; - } - *dst.offset(x as isize) = (v + 32 >> 6) as pixel; - x += 1; - base_x += base_inc_x; - ypos -= dy; - } - y += 1; - xpos -= dx; - dst = dst.offset(stride as isize); - } -} - unsafe extern "C" fn ipred_z3_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, From 5ffa91ebfa1b88e6fa3f99ccfdc11f142c07ee10 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 6 Nov 2023 22:52:38 -0800 Subject: [PATCH 30/49] `fn ipred_z3_rust`: Deduplicate w/ generics. --- src/ipred.rs | 106 +++++++++++++++++++++++++++++++++++++-- src/ipred_tmpl_16.rs | 116 +++++-------------------------------------- src/ipred_tmpl_8.rs | 113 +++++------------------------------------ 3 files changed, 126 insertions(+), 209 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 284e1bad5..8eb89836f 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -856,9 +856,8 @@ pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) return 0 as c_int; } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] -pub(crate) unsafe fn filter_edge( +unsafe fn filter_edge( out: *mut BD::Pixel, sz: c_int, lim_from: c_int, @@ -900,9 +899,8 @@ pub(crate) unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_in return (angle < 40 && wh <= 16 >> is_sm) as c_int; } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] -pub(crate) unsafe fn upsample_edge( +unsafe fn upsample_edge( out: *mut BD::Pixel, hsz: c_int, in_0: *const BD::Pixel, @@ -1161,6 +1159,106 @@ pub(crate) unsafe fn ipred_z2_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_z3_rust( + dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft_in: *const BD::Pixel, + width: c_int, + height: c_int, + mut angle: c_int, + _max_width: c_int, + _max_height: c_int, + bd: BD, +) { + let is_sm = angle >> 9 & 0x1 as c_int; + let enable_intra_edge_filter = angle >> 10; + angle &= 511 as c_int; + if !(angle > 180) { + unreachable!(); + } + let mut dy = dav1d_dr_intra_derivative[(270 - angle >> 1) as usize] as c_int; + let mut left_out: [BD::Pixel; 128] = [0.into(); 128]; + let left: *const BD::Pixel; + let max_base_y; + let upsample_left = if enable_intra_edge_filter != 0 { + get_upsample(width + height, angle - 180, is_sm) + } else { + 0 as c_int + }; + if upsample_left != 0 { + upsample_edge::( + left_out.as_mut_ptr(), + width + height, + &*topleft_in.offset(-(width + height) as isize), + cmp::max(width - height, 0 as c_int), + width + height + 1, + bd, + ); + left = &mut *left_out + .as_mut_ptr() + .offset((2 * (width + height) - 2) as isize) as *mut BD::Pixel; + max_base_y = 2 * (width + height) - 2; + dy <<= 1; + } else { + let filter_strength = if enable_intra_edge_filter != 0 { + get_filter_strength(width + height, angle - 180, is_sm) + } else { + 0 as c_int + }; + if filter_strength != 0 { + filter_edge::( + left_out.as_mut_ptr(), + width + height, + 0 as c_int, + width + height, + &*topleft_in.offset(-(width + height) as isize), + cmp::max(width - height, 0 as c_int), + width + height + 1, + filter_strength, + ); + left = + &mut *left_out.as_mut_ptr().offset((width + height - 1) as isize) as *mut BD::Pixel; + max_base_y = width + height - 1; + } else { + left = &*topleft_in.offset(-(1 as c_int) as isize) as *const BD::Pixel; + max_base_y = height + cmp::min(width, height) - 1; + } + } + let base_inc = 1 + upsample_left; + let mut x = 0; + let mut ypos = dy; + while x < width { + let frac = ypos & 0x3e as c_int; + let mut y = 0; + let mut base = ypos >> 6; + while y < height { + if base < max_base_y { + let v = (*left.offset(-base as isize)).as_::() * (64 - frac) + + (*left.offset(-(base + 1) as isize)).as_::() * frac; + *dst.offset( + (y as isize * BD::pxstride(stride as usize) as isize + x as isize) as isize, + ) = (v + 32 >> 6).as_::(); + y += 1; + base += base_inc; + } else { + loop { + *dst.offset( + (y as isize * BD::pxstride(stride as usize) as isize + x as isize) as isize, + ) = *left.offset(-max_base_y as isize); + y += 1; + if !(y < height) { + break; + } + } + break; + } + } + x += 1; + ypos += dy; + } +} + // TODO(kkysen) Temporarily pub until mod is deduplicated pub(crate) unsafe fn filter_fn( flt_ptr: *const i8, diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index dd79444e2..e38691bca 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -4,9 +4,6 @@ use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; -use crate::src::ipred::filter_edge; -use crate::src::ipred::get_filter_strength; -use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; @@ -23,7 +20,7 @@ use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::ipred_z1_rust; use crate::src::ipred::ipred_z2_rust; -use crate::src::ipred::upsample_edge; +use crate::src::ipred::ipred_z3_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -39,11 +36,9 @@ use crate::src::levels::VERT_PRED; use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -use crate::src::tables::dav1d_dr_intra_derivative; use crate::src::tables::dav1d_filter_intra_taps; use libc::memcpy; use libc::ptrdiff_t; -use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ffi::c_void; @@ -54,6 +49,15 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use std::cmp; + +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use crate::{ + src::ipred::get_filter_strength, src::ipred::get_upsample, + src::tables::dav1d_dr_intra_derivative, +}; + #[cfg(all(feature = "asm", target_arch = "aarch64"))] extern "C" { fn dav1d_ipred_z1_fill2_16bpc_neon( @@ -215,7 +219,7 @@ unsafe extern "C" fn ipred_z3_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z3_rust( + ipred_z3_rust::( dst.cast(), stride, topleft_in.cast(), @@ -224,106 +228,10 @@ unsafe extern "C" fn ipred_z3_c_erased( angle, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_z3_rust( - dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle > 180) { - unreachable!(); - } - let mut dy = dav1d_dr_intra_derivative[(270 - angle >> 1) as usize] as c_int; - let mut left_out: [pixel; 128] = [0; 128]; - let left: *const pixel; - let max_base_y; - let upsample_left = if enable_intra_edge_filter != 0 { - get_upsample(width + height, angle - 180, is_sm) - } else { - 0 as c_int - }; - if upsample_left != 0 { - upsample_edge::( - left_out.as_mut_ptr(), - width + height, - &*topleft_in.offset(-(width + height) as isize), - cmp::max(width - height, 0 as c_int), - width + height + 1, - BitDepth16::from_c(bitdepth_max), - ); - left = &mut *left_out - .as_mut_ptr() - .offset((2 * (width + height) - 2) as isize) as *mut pixel; - max_base_y = 2 * (width + height) - 2; - dy <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, angle - 180, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - filter_edge::( - left_out.as_mut_ptr(), - width + height, - 0 as c_int, - width + height, - &*topleft_in.offset(-(width + height) as isize), - cmp::max(width - height, 0 as c_int), - width + height + 1, - filter_strength, - ); - left = &mut *left_out.as_mut_ptr().offset((width + height - 1) as isize) as *mut pixel; - max_base_y = width + height - 1; - } else { - left = &*topleft_in.offset(-(1 as c_int) as isize) as *const pixel; - max_base_y = height + cmp::min(width, height) - 1; - } - } - let base_inc = 1 + upsample_left; - let mut x = 0; - let mut ypos = dy; - while x < width { - let frac = ypos & 0x3e as c_int; - let mut y = 0; - let mut base = ypos >> 6; - while y < height { - if base < max_base_y { - let v = *left.offset(-base as isize) as c_int * (64 - frac) - + *left.offset(-(base + 1) as isize) as c_int * frac; - *dst.offset((y as isize * PXSTRIDE(stride) + x as isize) as isize) = - (v + 32 >> 6) as pixel; - y += 1; - base += base_inc; - } else { - loop { - *dst.offset((y as isize * PXSTRIDE(stride) + x as isize) as isize) = - *left.offset(-max_base_y as isize); - y += 1; - if !(y < height) { - break; - } - } - break; - } - } - x += 1; - ypos += dy; - } -} - unsafe extern "C" fn ipred_filter_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 43232001c..1f7f7a115 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -4,9 +4,6 @@ use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; -use crate::src::ipred::filter_edge; -use crate::src::ipred::get_filter_strength; -use crate::src::ipred::get_upsample; use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; @@ -23,7 +20,7 @@ use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::ipred_z1_rust; use crate::src::ipred::ipred_z2_rust; -use crate::src::ipred::upsample_edge; +use crate::src::ipred::ipred_z3_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -39,11 +36,9 @@ use crate::src::levels::VERT_PRED; use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -use crate::src::tables::dav1d_dr_intra_derivative; use crate::src::tables::dav1d_filter_intra_taps; use libc::memcpy; use libc::ptrdiff_t; -use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ffi::c_void; @@ -54,6 +49,15 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use std::cmp; + +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use crate::{ + src::ipred::get_filter_strength, src::ipred::get_upsample, + src::tables::dav1d_dr_intra_derivative, +}; + #[cfg(all(feature = "asm", target_arch = "aarch64"))] extern "C" { fn dav1d_ipred_z1_fill2_8bpc_neon( @@ -201,7 +205,7 @@ unsafe extern "C" fn ipred_z3_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z3_rust( + ipred_z3_rust::( dst.cast(), stride, topleft_in.cast(), @@ -210,103 +214,10 @@ unsafe extern "C" fn ipred_z3_c_erased( angle, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_z3_rust( - dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle > 180) { - unreachable!(); - } - let mut dy = dav1d_dr_intra_derivative[(270 - angle >> 1) as usize] as c_int; - let mut left_out: [pixel; 128] = [0; 128]; - let left: *const pixel; - let max_base_y; - let upsample_left = if enable_intra_edge_filter != 0 { - get_upsample(width + height, angle - 180, is_sm) - } else { - 0 as c_int - }; - if upsample_left != 0 { - upsample_edge::( - left_out.as_mut_ptr(), - width + height, - &*topleft_in.offset(-(width + height) as isize), - cmp::max(width - height, 0 as c_int), - width + height + 1, - BitDepth8::new(()), - ); - left = &mut *left_out - .as_mut_ptr() - .offset((2 * (width + height) - 2) as isize) as *mut pixel; - max_base_y = 2 * (width + height) - 2; - dy <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, angle - 180, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - filter_edge::( - left_out.as_mut_ptr(), - width + height, - 0 as c_int, - width + height, - &*topleft_in.offset(-(width + height) as isize), - cmp::max(width - height, 0 as c_int), - width + height + 1, - filter_strength, - ); - left = &mut *left_out.as_mut_ptr().offset((width + height - 1) as isize) as *mut pixel; - max_base_y = width + height - 1; - } else { - left = &*topleft_in.offset(-(1 as c_int) as isize) as *const pixel; - max_base_y = height + cmp::min(width, height) - 1; - } - } - let base_inc = 1 + upsample_left; - let mut x = 0; - let mut ypos = dy; - while x < width { - let frac = ypos & 0x3e as c_int; - let mut y = 0; - let mut base = ypos >> 6; - while y < height { - if base < max_base_y { - let v = *left.offset(-base as isize) as c_int * (64 - frac) - + *left.offset(-(base + 1) as isize) as c_int * frac; - *dst.offset((y as isize * stride + x as isize) as isize) = (v + 32 >> 6) as pixel; - y += 1; - base += base_inc; - } else { - loop { - *dst.offset((y as isize * stride + x as isize) as isize) = - *left.offset(-max_base_y as isize); - y += 1; - if !(y < height) { - break; - } - } - break; - } - } - x += 1; - ypos += dy; - } -} - unsafe extern "C" fn ipred_filter_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, From 29e7d063106866a4e1c803db6476d9ab7b1b0f8d Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 16 Nov 2023 07:04:13 -0800 Subject: [PATCH 31/49] `fn ipred_filter_rust`: Deduplicate w/ generics. --- src/ipred.rs | 60 +++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 67 ++------------------------------------------ src/ipred_tmpl_8.rs | 64 ++---------------------------------------- 3 files changed, 66 insertions(+), 125 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 8eb89836f..2fd309be1 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -6,6 +6,7 @@ use crate::include::common::bitdepth::BPC; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::src::tables::dav1d_dr_intra_derivative; +use crate::src::tables::dav1d_filter_intra_taps; use crate::src::tables::dav1d_sm_weights; use cfg_if::cfg_if; use libc::ptrdiff_t; @@ -1298,3 +1299,62 @@ cfg_if! { pub(crate) const FLT_INCR: isize = 1; } } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn ipred_filter_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft_in: *const BD::Pixel, + width: c_int, + height: c_int, + mut filt_idx: c_int, + _max_width: c_int, + _max_height: c_int, + bd: BD, +) { + filt_idx &= 511 as c_int; + if !(filt_idx < 5) { + unreachable!(); + } + let filter: *const i8 = (dav1d_filter_intra_taps[filt_idx as usize]).as_ptr(); + let mut top: *const BD::Pixel = &*topleft_in.offset(1) as *const BD::Pixel; + let mut y = 0; + while y < height { + let mut topleft: *const BD::Pixel = &*topleft_in.offset(-y as isize) as *const BD::Pixel; + let mut left: *const BD::Pixel = + &*topleft.offset(-(1 as c_int) as isize) as *const BD::Pixel; + let mut left_stride: ptrdiff_t = -(1 as c_int) as ptrdiff_t; + let mut x = 0; + while x < width { + let p0 = (*topleft).as_::(); + let p1 = (*top.offset(0)).as_::(); + let p2 = (*top.offset(1)).as_::(); + let p3 = (*top.offset(2)).as_::(); + let p4 = (*top.offset(3)).as_::(); + let p5 = (*left.offset((0 * left_stride) as isize)).as_::(); + let p6 = (*left.offset((1 * left_stride) as isize)).as_::(); + let mut ptr: *mut BD::Pixel = &mut *dst.offset(x as isize) as *mut BD::Pixel; + let mut flt_ptr: *const i8 = filter; + let mut yy = 0; + while yy < 2 { + let mut xx = 0; + while xx < 4 { + let acc = filter_fn(flt_ptr, p0, p1, p2, p3, p4, p5, p6); + *ptr.offset(xx as isize) = bd.iclip_pixel(acc + 8 >> 4); + xx += 1; + flt_ptr = flt_ptr.offset(FLT_INCR); + } + ptr = ptr.offset(BD::pxstride(stride as usize) as isize); + yy += 1; + } + left = &mut *dst.offset((x + 4 - 1) as isize) as *mut BD::Pixel; + left_stride = BD::pxstride(stride as usize) as isize; + top = top.offset(4); + topleft = &*top.offset(-(1 as c_int) as isize) as *const BD::Pixel; + x += 4 as c_int; + } + top = &mut *dst.offset(BD::pxstride(stride as usize) as isize) as *mut BD::Pixel; + dst = &mut *dst.offset((BD::pxstride(stride as usize) * 2) as isize) as *mut BD::Pixel; + y += 2 as c_int; + } +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index e38691bca..50b2b1550 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -2,7 +2,6 @@ use crate::include::common::attributes::ctz; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; -use crate::include::common::intops::iclip; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; @@ -12,6 +11,7 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; +use crate::src::ipred::ipred_filter_rust; use crate::src::ipred::ipred_h_c_erased; use crate::src::ipred::ipred_paeth_c_erased; use crate::src::ipred::ipred_smooth_c_erased; @@ -36,7 +36,6 @@ use crate::src::levels::VERT_PRED; use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -use crate::src::tables::dav1d_filter_intra_taps; use libc::memcpy; use libc::ptrdiff_t; use std::ffi::c_int; @@ -243,7 +242,7 @@ unsafe extern "C" fn ipred_filter_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_filter_rust( + ipred_filter_rust::( dst.cast(), stride, topleft_in.cast(), @@ -252,70 +251,10 @@ unsafe extern "C" fn ipred_filter_c_erased( filt_idx, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -unsafe fn ipred_filter_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut filt_idx: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - use crate::src::ipred::{filter_fn, FLT_INCR}; - - filt_idx &= 511 as c_int; - if !(filt_idx < 5) { - unreachable!(); - } - let filter: *const i8 = (dav1d_filter_intra_taps[filt_idx as usize]).as_ptr(); - let mut top: *const pixel = &*topleft_in.offset(1) as *const pixel; - let mut y = 0; - while y < height { - let mut topleft: *const pixel = &*topleft_in.offset(-y as isize) as *const pixel; - let mut left: *const pixel = &*topleft.offset(-(1 as c_int) as isize) as *const pixel; - let mut left_stride: ptrdiff_t = -(1 as c_int) as ptrdiff_t; - let mut x = 0; - while x < width { - let p0 = *topleft as c_int; - let p1 = *top.offset(0) as c_int; - let p2 = *top.offset(1) as c_int; - let p3 = *top.offset(2) as c_int; - let p4 = *top.offset(3) as c_int; - let p5 = *left.offset((0 * left_stride) as isize) as c_int; - let p6 = *left.offset((1 * left_stride) as isize) as c_int; - let mut ptr: *mut pixel = &mut *dst.offset(x as isize) as *mut pixel; - let mut flt_ptr: *const i8 = filter; - let mut yy = 0; - while yy < 2 { - let mut xx = 0; - while xx < 4 { - let acc = filter_fn(flt_ptr, p0, p1, p2, p3, p4, p5, p6); - *ptr.offset(xx as isize) = - iclip(acc + 8 >> 4, 0 as c_int, bitdepth_max) as pixel; - xx += 1; - flt_ptr = flt_ptr.offset(FLT_INCR); - } - ptr = ptr.offset(PXSTRIDE(stride) as isize); - yy += 1; - } - left = &mut *dst.offset((x + 4 - 1) as isize) as *mut pixel; - left_stride = PXSTRIDE(stride); - top = top.offset(4); - topleft = &*top.offset(-(1 as c_int) as isize) as *const pixel; - x += 4 as c_int; - } - top = &mut *dst.offset((PXSTRIDE)(stride) as isize) as *mut pixel; - dst = &mut *dst.offset(((PXSTRIDE)(stride) * 2) as isize) as *mut pixel; - y += 2 as c_int; - } -} - #[inline(never)] unsafe fn cfl_ac_c( mut ac: *mut i16, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 1f7f7a115..eb6b65088 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -2,7 +2,6 @@ use crate::include::common::attributes::ctz; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; -use crate::include::common::intops::iclip_u8; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; @@ -12,6 +11,7 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; +use crate::src::ipred::ipred_filter_rust; use crate::src::ipred::ipred_h_c_erased; use crate::src::ipred::ipred_paeth_c_erased; use crate::src::ipred::ipred_smooth_c_erased; @@ -36,7 +36,6 @@ use crate::src::levels::VERT_PRED; use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -use crate::src::tables::dav1d_filter_intra_taps; use libc::memcpy; use libc::ptrdiff_t; use std::ffi::c_int; @@ -229,7 +228,7 @@ unsafe extern "C" fn ipred_filter_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_filter_rust( + ipred_filter_rust::( dst.cast(), stride, topleft_in.cast(), @@ -238,67 +237,10 @@ unsafe extern "C" fn ipred_filter_c_erased( filt_idx, max_width, max_height, + BitDepth8::new(()), ); } -unsafe fn ipred_filter_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut filt_idx: c_int, - _max_width: c_int, - _max_height: c_int, -) { - use crate::src::ipred::{filter_fn, FLT_INCR}; - - filt_idx &= 511 as c_int; - if !(filt_idx < 5) { - unreachable!(); - } - let filter: *const i8 = (dav1d_filter_intra_taps[filt_idx as usize]).as_ptr(); - let mut top: *const pixel = &*topleft_in.offset(1) as *const pixel; - let mut y = 0; - while y < height { - let mut topleft: *const pixel = &*topleft_in.offset(-y as isize) as *const pixel; - let mut left: *const pixel = &*topleft.offset(-(1 as c_int) as isize) as *const pixel; - let mut left_stride: ptrdiff_t = -(1 as c_int) as ptrdiff_t; - let mut x = 0; - while x < width { - let p0 = *topleft as c_int; - let p1 = *top.offset(0) as c_int; - let p2 = *top.offset(1) as c_int; - let p3 = *top.offset(2) as c_int; - let p4 = *top.offset(3) as c_int; - let p5 = *left.offset((0 * left_stride) as isize) as c_int; - let p6 = *left.offset((1 * left_stride) as isize) as c_int; - let mut ptr: *mut pixel = &mut *dst.offset(x as isize) as *mut pixel; - let mut flt_ptr: *const i8 = filter; - let mut yy = 0; - while yy < 2 { - let mut xx = 0; - while xx < 4 { - let acc = filter_fn(flt_ptr, p0, p1, p2, p3, p4, p5, p6); - *ptr.offset(xx as isize) = iclip_u8(acc + 8 >> 4) as pixel; - xx += 1; - flt_ptr = flt_ptr.offset(FLT_INCR); - } - ptr = ptr.offset(stride as isize); - yy += 1; - } - left = &mut *dst.offset((x + 4 - 1) as isize) as *mut pixel; - left_stride = stride; - top = top.offset(4); - topleft = &*top.offset(-(1 as c_int) as isize) as *const pixel; - x += 4 as c_int; - } - top = &mut *dst.offset(stride as isize) as *mut pixel; - dst = &mut *dst.offset((stride * 2) as isize) as *mut pixel; - y += 2 as c_int; - } -} - #[inline(never)] unsafe fn cfl_ac_c( mut ac: *mut i16, From be1dd72833cf77975b770c3f7296d48ca840100f Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 16 Nov 2023 10:48:33 -0800 Subject: [PATCH 32/49] `fn cfl_ac_{c => rust}`: Rename like the others. --- src/ipred_tmpl_16.rs | 8 ++++---- src/ipred_tmpl_8.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 50b2b1550..0433cc5fa 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -256,7 +256,7 @@ unsafe extern "C" fn ipred_filter_c_erased( } #[inline(never)] -unsafe fn cfl_ac_c( +unsafe fn cfl_ac_rust( mut ac: *mut i16, mut ypx: *const pixel, stride: ptrdiff_t, @@ -350,7 +350,7 @@ unsafe extern "C" fn cfl_ac_420_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_c( + cfl_ac_rust( ac, ypx.cast(), stride, @@ -372,7 +372,7 @@ unsafe extern "C" fn cfl_ac_422_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_c( + cfl_ac_rust( ac, ypx.cast(), stride, @@ -394,7 +394,7 @@ unsafe extern "C" fn cfl_ac_444_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_c( + cfl_ac_rust( ac, ypx.cast(), stride, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index eb6b65088..744d9cc3a 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -242,7 +242,7 @@ unsafe extern "C" fn ipred_filter_c_erased( } #[inline(never)] -unsafe fn cfl_ac_c( +unsafe fn cfl_ac_rust( mut ac: *mut i16, mut ypx: *const pixel, stride: ptrdiff_t, @@ -334,7 +334,7 @@ unsafe extern "C" fn cfl_ac_420_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_c( + cfl_ac_rust( ac, ypx.cast(), stride, @@ -356,7 +356,7 @@ unsafe extern "C" fn cfl_ac_422_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_c( + cfl_ac_rust( ac, ypx.cast(), stride, @@ -378,7 +378,7 @@ unsafe extern "C" fn cfl_ac_444_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_c( + cfl_ac_rust( ac, ypx.cast(), stride, From 1348084376846cc1f3f6517fbbe1025494d47ad0 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 16 Nov 2023 11:01:19 -0800 Subject: [PATCH 33/49] `fn cfl_ac_rust`: Deduplicate w/ generics. --- src/ipred.rs | 93 +++++++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 95 ++------------------------------------------ src/ipred_tmpl_8.rs | 93 ++----------------------------------------- 3 files changed, 101 insertions(+), 180 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 2fd309be1..787b6c8b6 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -9,12 +9,14 @@ use crate::src::tables::dav1d_dr_intra_derivative; use crate::src::tables::dav1d_filter_intra_taps; use crate::src::tables::dav1d_sm_weights; use cfg_if::cfg_if; +use libc::memcpy; use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ffi::c_ulong; use std::ffi::c_ulonglong; +use std::ffi::c_void; use std::slice; pub type angular_ipred_fn = unsafe extern "C" fn( @@ -1358,3 +1360,94 @@ pub(crate) unsafe fn ipred_filter_rust( y += 2 as c_int; } } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[inline(never)] +pub(crate) unsafe fn cfl_ac_rust( + mut ac: *mut i16, + mut ypx: *const BD::Pixel, + stride: ptrdiff_t, + w_pad: c_int, + h_pad: c_int, + width: c_int, + height: c_int, + ss_hor: c_int, + ss_ver: c_int, +) { + let mut y; + let mut x: i32; + let ac_orig: *mut i16 = ac; + if !(w_pad >= 0 && (w_pad * 4) < width) { + unreachable!(); + } + if !(h_pad >= 0 && (h_pad * 4) < height) { + unreachable!(); + } + y = 0 as c_int; + while y < height - 4 * h_pad { + x = 0 as c_int; + while x < width - 4 * w_pad { + let mut ac_sum = (*ypx.offset((x << ss_hor) as isize)).as_::(); + if ss_hor != 0 { + ac_sum += (*ypx.offset((x * 2 + 1) as isize)).as_::(); + } + if ss_ver != 0 { + ac_sum += (*ypx.offset( + ((x << ss_hor) as isize + BD::pxstride(stride as usize) as isize) as isize, + )) + .as_::(); + if ss_hor != 0 { + ac_sum += (*ypx.offset( + ((x * 2 + 1) as isize + BD::pxstride(stride as usize) as isize) as isize, + )) + .as_::(); + } + } + *ac.offset(x as isize) = + (ac_sum << 1 + (ss_ver == 0) as c_int + (ss_hor == 0) as c_int) as i16; + x += 1; + } + while x < width { + *ac.offset(x as isize) = *ac.offset((x - 1) as isize); + x += 1; + } + ac = ac.offset(width as isize); + ypx = ypx.offset((BD::pxstride(stride as usize) << ss_ver) as isize); + y += 1; + } + while y < height { + memcpy( + ac as *mut c_void, + &mut *ac.offset(-width as isize) as *mut i16 as *const c_void, + (width as usize).wrapping_mul(::core::mem::size_of::()), + ); + ac = ac.offset(width as isize); + y += 1; + } + let log2sz = ctz(width as c_uint) + ctz(height as c_uint); + let mut sum = (1 as c_int) << log2sz >> 1; + ac = ac_orig; + y = 0 as c_int; + while y < height { + x = 0 as c_int; + while x < width { + sum += *ac.offset(x as isize) as c_int; + x += 1; + } + ac = ac.offset(width as isize); + y += 1; + } + sum >>= log2sz; + ac = ac_orig; + y = 0 as c_int; + while y < height { + x = 0 as c_int; + while x < width { + let ref mut fresh0 = *ac.offset(x as isize); + *fresh0 = (*fresh0 as c_int - sum) as i16; + x += 1; + } + ac = ac.offset(width as isize); + y += 1; + } +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 0433cc5fa..eafc61541 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -1,8 +1,8 @@ -use crate::include::common::attributes::ctz; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::ipred::cfl_ac_rust; use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; @@ -39,7 +39,6 @@ use crate::src::levels::Z3_PRED; use libc::memcpy; use libc::ptrdiff_t; use std::ffi::c_int; -use std::ffi::c_uint; use std::ffi::c_void; #[cfg(feature = "asm")] @@ -255,92 +254,6 @@ unsafe extern "C" fn ipred_filter_c_erased( ); } -#[inline(never)] -unsafe fn cfl_ac_rust( - mut ac: *mut i16, - mut ypx: *const pixel, - stride: ptrdiff_t, - w_pad: c_int, - h_pad: c_int, - width: c_int, - height: c_int, - ss_hor: c_int, - ss_ver: c_int, -) { - let mut y; - let mut x: i32; - let ac_orig: *mut i16 = ac; - if !(w_pad >= 0 && (w_pad * 4) < width) { - unreachable!(); - } - if !(h_pad >= 0 && (h_pad * 4) < height) { - unreachable!(); - } - y = 0 as c_int; - while y < height - 4 * h_pad { - x = 0 as c_int; - while x < width - 4 * w_pad { - let mut ac_sum = *ypx.offset((x << ss_hor) as isize) as c_int; - if ss_hor != 0 { - ac_sum += *ypx.offset((x * 2 + 1) as isize) as c_int; - } - if ss_ver != 0 { - ac_sum += - *ypx.offset(((x << ss_hor) as isize + PXSTRIDE(stride)) as isize) as c_int; - if ss_hor != 0 { - ac_sum += - *ypx.offset(((x * 2 + 1) as isize + PXSTRIDE(stride)) as isize) as c_int; - } - } - *ac.offset(x as isize) = - (ac_sum << 1 + (ss_ver == 0) as c_int + (ss_hor == 0) as c_int) as i16; - x += 1; - } - while x < width { - *ac.offset(x as isize) = *ac.offset((x - 1) as isize); - x += 1; - } - ac = ac.offset(width as isize); - ypx = ypx.offset((PXSTRIDE(stride) << ss_ver) as isize); - y += 1; - } - while y < height { - memcpy( - ac as *mut c_void, - &mut *ac.offset(-width as isize) as *mut i16 as *const c_void, - (width as usize).wrapping_mul(::core::mem::size_of::()), - ); - ac = ac.offset(width as isize); - y += 1; - } - let log2sz = ctz(width as c_uint) + ctz(height as c_uint); - let mut sum = (1 as c_int) << log2sz >> 1; - ac = ac_orig; - y = 0 as c_int; - while y < height { - x = 0 as c_int; - while x < width { - sum += *ac.offset(x as isize) as c_int; - x += 1; - } - ac = ac.offset(width as isize); - y += 1; - } - sum >>= log2sz; - ac = ac_orig; - y = 0 as c_int; - while y < height { - x = 0 as c_int; - while x < width { - let ref mut fresh0 = *ac.offset(x as isize); - *fresh0 = (*fresh0 as c_int - sum) as i16; - x += 1; - } - ac = ac.offset(width as isize); - y += 1; - } -} - unsafe extern "C" fn cfl_ac_420_c_erased( ac: *mut i16, ypx: *const DynPixel, @@ -350,7 +263,7 @@ unsafe extern "C" fn cfl_ac_420_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_rust( + cfl_ac_rust::( ac, ypx.cast(), stride, @@ -372,7 +285,7 @@ unsafe extern "C" fn cfl_ac_422_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_rust( + cfl_ac_rust::( ac, ypx.cast(), stride, @@ -394,7 +307,7 @@ unsafe extern "C" fn cfl_ac_444_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_rust( + cfl_ac_rust::( ac, ypx.cast(), stride, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 744d9cc3a..8a1d07e77 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -1,8 +1,8 @@ -use crate::include::common::attributes::ctz; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::ipred::cfl_ac_rust; use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; @@ -39,7 +39,6 @@ use crate::src::levels::Z3_PRED; use libc::memcpy; use libc::ptrdiff_t; use std::ffi::c_int; -use std::ffi::c_uint; use std::ffi::c_void; #[cfg(feature = "asm")] @@ -241,90 +240,6 @@ unsafe extern "C" fn ipred_filter_c_erased( ); } -#[inline(never)] -unsafe fn cfl_ac_rust( - mut ac: *mut i16, - mut ypx: *const pixel, - stride: ptrdiff_t, - w_pad: c_int, - h_pad: c_int, - width: c_int, - height: c_int, - ss_hor: c_int, - ss_ver: c_int, -) { - let mut y; - let mut x; - let ac_orig: *mut i16 = ac; - if !(w_pad >= 0 && (w_pad * 4) < width) { - unreachable!(); - } - if !(h_pad >= 0 && (h_pad * 4) < height) { - unreachable!(); - } - y = 0 as c_int; - while y < height - 4 * h_pad { - x = 0 as c_int; - while x < width - 4 * w_pad { - let mut ac_sum = *ypx.offset((x << ss_hor) as isize) as c_int; - if ss_hor != 0 { - ac_sum += *ypx.offset((x * 2 + 1) as isize) as c_int; - } - if ss_ver != 0 { - ac_sum += *ypx.offset(((x << ss_hor) as isize + stride) as isize) as c_int; - if ss_hor != 0 { - ac_sum += *ypx.offset(((x * 2 + 1) as isize + stride) as isize) as c_int; - } - } - *ac.offset(x as isize) = - (ac_sum << 1 + (ss_ver == 0) as c_int + (ss_hor == 0) as c_int) as i16; - x += 1; - } - while x < width { - *ac.offset(x as isize) = *ac.offset((x - 1) as isize); - x += 1; - } - ac = ac.offset(width as isize); - ypx = ypx.offset((stride << ss_ver) as isize); - y += 1; - } - while y < height { - memcpy( - ac as *mut c_void, - &mut *ac.offset(-width as isize) as *mut i16 as *const c_void, - (width as usize).wrapping_mul(::core::mem::size_of::()), - ); - ac = ac.offset(width as isize); - y += 1; - } - let log2sz = ctz(width as c_uint) + ctz(height as c_uint); - let mut sum = (1 as c_int) << log2sz >> 1; - ac = ac_orig; - y = 0 as c_int; - while y < height { - x = 0 as c_int; - while x < width { - sum += *ac.offset(x as isize) as c_int; - x += 1; - } - ac = ac.offset(width as isize); - y += 1; - } - sum >>= log2sz; - ac = ac_orig; - y = 0 as c_int; - while y < height { - x = 0 as c_int; - while x < width { - let ref mut fresh0 = *ac.offset(x as isize); - *fresh0 = (*fresh0 as c_int - sum) as i16; - x += 1; - } - ac = ac.offset(width as isize); - y += 1; - } -} - unsafe extern "C" fn cfl_ac_420_c_erased( ac: *mut i16, ypx: *const DynPixel, @@ -334,7 +249,7 @@ unsafe extern "C" fn cfl_ac_420_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_rust( + cfl_ac_rust::( ac, ypx.cast(), stride, @@ -356,7 +271,7 @@ unsafe extern "C" fn cfl_ac_422_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_rust( + cfl_ac_rust::( ac, ypx.cast(), stride, @@ -378,7 +293,7 @@ unsafe extern "C" fn cfl_ac_444_c_erased( cw: c_int, ch: c_int, ) { - cfl_ac_rust( + cfl_ac_rust::( ac, ypx.cast(), stride, From d83c3bc94d0816a465c066c060d4f8f8cd01adcd Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 16 Nov 2023 11:05:33 -0800 Subject: [PATCH 34/49] `fn pal_pred_rust`: Deduplicate w/ generics. --- include/common/bitdepth.rs | 1 + src/ipred.rs | 23 +++++++++++++++++++++++ src/ipred_tmpl_16.rs | 32 ++------------------------------ src/ipred_tmpl_8.rs | 24 ++---------------------- 4 files changed, 28 insertions(+), 52 deletions(-) diff --git a/include/common/bitdepth.rs b/include/common/bitdepth.rs index 610de3c7d..b3bbff866 100644 --- a/include/common/bitdepth.rs +++ b/include/common/bitdepth.rs @@ -98,6 +98,7 @@ pub trait BitDepth: Clone + Copy { + Into + Into + TryFrom + + FromPrimitive + FromPrimitive + FromPrimitive + ToPrimitive diff --git a/src/ipred.rs b/src/ipred.rs index 787b6c8b6..8cfb84cb3 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1451,3 +1451,26 @@ pub(crate) unsafe fn cfl_ac_rust( y += 1; } } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe fn pal_pred_rust( + mut dst: *mut BD::Pixel, + stride: ptrdiff_t, + pal: *const u16, + mut idx: *const u8, + w: c_int, + h: c_int, +) { + let mut y = 0; + while y < h { + let mut x = 0; + while x < w { + *dst.offset(x as isize) = + (*pal.offset(*idx.offset(x as isize) as isize)).as_::(); + x += 1; + } + idx = idx.offset(w as isize); + dst = dst.offset(BD::pxstride(stride as usize) as isize); + y += 1; + } +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index eafc61541..fa1bd205a 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -21,6 +21,7 @@ use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::ipred_z1_rust; use crate::src::ipred::ipred_z2_rust; use crate::src::ipred::ipred_z3_rust; +use crate::src::ipred::pal_pred_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -150,14 +151,6 @@ extern "C" { pub type pixel = u16; -#[inline] -unsafe fn PXSTRIDE(x: ptrdiff_t) -> ptrdiff_t { - if x & 1 != 0 { - unreachable!(); - } - return x >> 1; -} - unsafe extern "C" fn ipred_z1_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -328,28 +321,7 @@ unsafe extern "C" fn pal_pred_c_erased( w: c_int, h: c_int, ) { - pal_pred_rust(dst.cast(), stride, pal, idx, w, h); -} - -unsafe fn pal_pred_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - pal: *const u16, - mut idx: *const u8, - w: c_int, - h: c_int, -) { - let mut y = 0; - while y < h { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = *pal.offset(*idx.offset(x as isize) as isize); - x += 1; - } - idx = idx.offset(w as isize); - dst = dst.offset(PXSTRIDE(stride) as isize); - y += 1; - } + pal_pred_rust::(dst.cast(), stride, pal, idx, w, h); } #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))] diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 8a1d07e77..c3290c784 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -21,6 +21,7 @@ use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::ipred_z1_rust; use crate::src::ipred::ipred_z2_rust; use crate::src::ipred::ipred_z3_rust; +use crate::src::ipred::pal_pred_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -314,28 +315,7 @@ unsafe extern "C" fn pal_pred_c_erased( w: c_int, h: c_int, ) { - pal_pred_rust(dst.cast(), stride, pal, idx, w, h); -} - -unsafe fn pal_pred_rust( - mut dst: *mut pixel, - stride: ptrdiff_t, - pal: *const u16, - mut idx: *const u8, - w: c_int, - h: c_int, -) { - let mut y = 0; - while y < h { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = *pal.offset(*idx.offset(x as isize) as isize) as pixel; - x += 1; - } - idx = idx.offset(w as isize); - dst = dst.offset(stride as isize); - y += 1; - } + pal_pred_rust::(dst.cast(), stride, pal, idx, w, h); } #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))] From 0a6f530151b21eeb411c171bac4fdf19c826f097 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 16 Nov 2023 11:27:01 -0800 Subject: [PATCH 35/49] `fn dav1d_ipred_*_neon`: Move `extern` declarations to `src/ipred.rs`. --- src/ipred.rs | 206 +++++++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 104 +++------------------- src/ipred_tmpl_8.rs | 97 +++----------------- 3 files changed, 229 insertions(+), 178 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 8cfb84cb3..368a15ba6 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -19,6 +19,12 @@ use std::ffi::c_ulonglong; use std::ffi::c_void; use std::slice; +#[cfg(feature = "bitdepth_8")] +use crate::include::common::bitdepth::BitDepth8; + +#[cfg(feature = "bitdepth_16")] +use crate::include::common::bitdepth::BitDepth16; + pub type angular_ipred_fn = unsafe extern "C" fn( *mut DynPixel, ptrdiff_t, @@ -168,6 +174,206 @@ extern "C" { decl_fns!(pal_pred, pal_pred); } +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "bitdepth_8", feature = "asm", target_arch = "aarch64"))] +extern "C" { + pub(crate) fn dav1d_ipred_z1_fill2_8bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + max_base_x: c_int, + ); + pub(crate) fn dav1d_ipred_z1_fill1_8bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + max_base_x: c_int, + ); + pub(crate) fn dav1d_ipred_z1_upsample_edge_8bpc_neon( + out: *mut ::Pixel, + hsz: c_int, + in_0: *const ::Pixel, + end: c_int, + ); + pub(crate) fn dav1d_ipred_z1_filter_edge_8bpc_neon( + out: *mut ::Pixel, + sz: c_int, + in_0: *const ::Pixel, + end: c_int, + strength: c_int, + ); + pub(crate) fn dav1d_ipred_z2_fill3_8bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + left: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + dy: c_int, + ); + pub(crate) fn dav1d_ipred_z2_fill2_8bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + left: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + dy: c_int, + ); + pub(crate) fn dav1d_ipred_z2_fill1_8bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + left: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + dy: c_int, + ); + pub(crate) fn dav1d_ipred_z2_upsample_edge_8bpc_neon( + out: *mut ::Pixel, + hsz: c_int, + in_0: *const ::Pixel, + ); + pub(crate) fn dav1d_ipred_reverse_8bpc_neon( + dst: *mut ::Pixel, + src: *const ::Pixel, + n: c_int, + ); + pub(crate) fn dav1d_ipred_z3_fill2_8bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + left: *const ::Pixel, + width: c_int, + height: c_int, + dy: c_int, + max_base_y: c_int, + ); + pub(crate) fn dav1d_ipred_z3_fill1_8bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + left: *const ::Pixel, + width: c_int, + height: c_int, + dy: c_int, + max_base_y: c_int, + ); + pub(crate) fn dav1d_ipred_pixel_set_8bpc_neon( + out: *mut ::Pixel, + px: ::Pixel, + n: c_int, + ); +} + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "bitdepth_16", feature = "asm", target_arch = "aarch64"))] +extern "C" { + pub(crate) fn dav1d_ipred_z1_fill2_16bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + max_base_x: c_int, + ); + pub(crate) fn dav1d_ipred_z1_fill1_16bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + max_base_x: c_int, + ); + pub(crate) fn dav1d_ipred_z1_upsample_edge_16bpc_neon( + out: *mut ::Pixel, + hsz: c_int, + in_0: *const ::Pixel, + end: c_int, + bitdepth_max: c_int, + ); + pub(crate) fn dav1d_ipred_z1_filter_edge_16bpc_neon( + out: *mut ::Pixel, + sz: c_int, + in_0: *const ::Pixel, + end: c_int, + strength: c_int, + ); + pub(crate) fn dav1d_ipred_z2_fill3_16bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + left: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + dy: c_int, + ); + pub(crate) fn dav1d_ipred_z2_fill2_16bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + left: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + dy: c_int, + ); + pub(crate) fn dav1d_ipred_z2_fill1_16bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + top: *const ::Pixel, + left: *const ::Pixel, + width: c_int, + height: c_int, + dx: c_int, + dy: c_int, + ); + pub(crate) fn dav1d_ipred_z2_upsample_edge_16bpc_neon( + out: *mut ::Pixel, + hsz: c_int, + in_0: *const ::Pixel, + bitdepth_max: c_int, + ); + pub(crate) fn dav1d_ipred_reverse_16bpc_neon( + dst: *mut ::Pixel, + src: *const ::Pixel, + n: c_int, + ); + pub(crate) fn dav1d_ipred_z3_fill2_16bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + left: *const ::Pixel, + width: c_int, + height: c_int, + dy: c_int, + max_base_y: c_int, + ); + pub(crate) fn dav1d_ipred_z3_fill1_16bpc_neon( + dst: *mut ::Pixel, + stride: ptrdiff_t, + left: *const ::Pixel, + width: c_int, + height: c_int, + dy: c_int, + max_base_y: c_int, + ); + pub(crate) fn dav1d_ipred_pixel_set_16bpc_neon( + out: *mut ::Pixel, + px: ::Pixel, + n: c_int, + ); +} + #[inline(never)] unsafe fn splat_dc( mut dst: *mut BD::Pixel, diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index fa1bd205a..321e0dd79 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -42,6 +42,18 @@ use libc::ptrdiff_t; use std::ffi::c_int; use std::ffi::c_void; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use crate::{ + src::ipred::dav1d_ipred_pixel_set_16bpc_neon, src::ipred::dav1d_ipred_reverse_16bpc_neon, + src::ipred::dav1d_ipred_z1_fill1_16bpc_neon, src::ipred::dav1d_ipred_z1_fill2_16bpc_neon, + src::ipred::dav1d_ipred_z1_filter_edge_16bpc_neon, + src::ipred::dav1d_ipred_z1_upsample_edge_16bpc_neon, + src::ipred::dav1d_ipred_z2_fill1_16bpc_neon, src::ipred::dav1d_ipred_z2_fill2_16bpc_neon, + src::ipred::dav1d_ipred_z2_fill3_16bpc_neon, + src::ipred::dav1d_ipred_z2_upsample_edge_16bpc_neon, + src::ipred::dav1d_ipred_z3_fill1_16bpc_neon, src::ipred::dav1d_ipred_z3_fill2_16bpc_neon, +}; + #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; @@ -57,98 +69,6 @@ use crate::{ src::tables::dav1d_dr_intra_derivative, }; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -extern "C" { - fn dav1d_ipred_z1_fill2_16bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - max_base_x: c_int, - ); - fn dav1d_ipred_z1_fill1_16bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - max_base_x: c_int, - ); - fn dav1d_ipred_z1_upsample_edge_16bpc_neon( - out: *mut pixel, - hsz: c_int, - in_0: *const pixel, - end: c_int, - bitdepth_max: c_int, - ); - fn dav1d_ipred_z1_filter_edge_16bpc_neon( - out: *mut pixel, - sz: c_int, - in_0: *const pixel, - end: c_int, - strength: c_int, - ); - fn dav1d_ipred_z2_fill3_16bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - left: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - dy: c_int, - ); - fn dav1d_ipred_z2_fill2_16bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - left: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - dy: c_int, - ); - fn dav1d_ipred_z2_fill1_16bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - left: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - dy: c_int, - ); - fn dav1d_ipred_z2_upsample_edge_16bpc_neon( - out: *mut pixel, - hsz: c_int, - in_0: *const pixel, - bitdepth_max: c_int, - ); - fn dav1d_ipred_reverse_16bpc_neon(dst: *mut pixel, src: *const pixel, n: c_int); - fn dav1d_ipred_z3_fill2_16bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const pixel, - width: c_int, - height: c_int, - dy: c_int, - max_base_y: c_int, - ); - fn dav1d_ipred_z3_fill1_16bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const pixel, - width: c_int, - height: c_int, - dy: c_int, - max_base_y: c_int, - ); - fn dav1d_ipred_pixel_set_16bpc_neon(out: *mut pixel, px: pixel, n: c_int); -} - pub type pixel = u16; unsafe extern "C" fn ipred_z1_c_erased( diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index c3290c784..721d5651d 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -42,6 +42,17 @@ use libc::ptrdiff_t; use std::ffi::c_int; use std::ffi::c_void; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use crate::{ + src::ipred::dav1d_ipred_pixel_set_8bpc_neon, src::ipred::dav1d_ipred_reverse_8bpc_neon, + src::ipred::dav1d_ipred_z1_fill1_8bpc_neon, src::ipred::dav1d_ipred_z1_fill2_8bpc_neon, + src::ipred::dav1d_ipred_z1_filter_edge_8bpc_neon, + src::ipred::dav1d_ipred_z1_upsample_edge_8bpc_neon, src::ipred::dav1d_ipred_z2_fill1_8bpc_neon, + src::ipred::dav1d_ipred_z2_fill2_8bpc_neon, src::ipred::dav1d_ipred_z2_fill3_8bpc_neon, + src::ipred::dav1d_ipred_z2_upsample_edge_8bpc_neon, src::ipred::dav1d_ipred_z3_fill1_8bpc_neon, + src::ipred::dav1d_ipred_z3_fill2_8bpc_neon, +}; + #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; @@ -57,92 +68,6 @@ use crate::{ src::tables::dav1d_dr_intra_derivative, }; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -extern "C" { - fn dav1d_ipred_z1_fill2_8bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - max_base_x: c_int, - ); - fn dav1d_ipred_z1_fill1_8bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - max_base_x: c_int, - ); - fn dav1d_ipred_z1_upsample_edge_8bpc_neon( - out: *mut pixel, - hsz: c_int, - in_0: *const pixel, - end: c_int, - ); - fn dav1d_ipred_z1_filter_edge_8bpc_neon( - out: *mut pixel, - sz: c_int, - in_0: *const pixel, - end: c_int, - strength: c_int, - ); - fn dav1d_ipred_z2_fill3_8bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - left: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - dy: c_int, - ); - fn dav1d_ipred_z2_fill2_8bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - left: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - dy: c_int, - ); - fn dav1d_ipred_z2_fill1_8bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - top: *const pixel, - left: *const pixel, - width: c_int, - height: c_int, - dx: c_int, - dy: c_int, - ); - fn dav1d_ipred_z2_upsample_edge_8bpc_neon(out: *mut pixel, hsz: c_int, in_0: *const pixel); - fn dav1d_ipred_reverse_8bpc_neon(dst: *mut pixel, src: *const pixel, n: c_int); - fn dav1d_ipred_z3_fill2_8bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const pixel, - width: c_int, - height: c_int, - dy: c_int, - max_base_y: c_int, - ); - fn dav1d_ipred_z3_fill1_8bpc_neon( - dst: *mut pixel, - stride: ptrdiff_t, - left: *const pixel, - width: c_int, - height: c_int, - dy: c_int, - max_base_y: c_int, - ); - fn dav1d_ipred_pixel_set_8bpc_neon(out: *mut pixel, px: pixel, n: c_int); -} - pub type pixel = u8; unsafe extern "C" fn ipred_z1_c_erased( From 39b2c81fa125790b308fb41e230d9fca60dddc75 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 17 Nov 2023 00:10:58 -0800 Subject: [PATCH 36/49] `fn dav1d_ipred_*_neon`: Type-erase `BitDepth` generics. One `fn` has a `const pixel px` arg, so we can't type erase it because it's not behind a ptr and so the two types are different sizes. It should be manageable since it's called in a Rust neon `fn`, not through a `fn` ptr, and so we can just do the dispatch manually without `bd_fn!` (which requires the two `fn` types to be unified). --- src/ipred.rs | 106 +++++++++++++++++++++---------------------- src/ipred_tmpl_16.rs | 100 +++++++++++++++++++++------------------- src/ipred_tmpl_8.rs | 100 +++++++++++++++++++++------------------- 3 files changed, 157 insertions(+), 149 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 368a15ba6..8cd13c338 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -178,96 +178,92 @@ extern "C" { #[cfg(all(feature = "bitdepth_8", feature = "asm", target_arch = "aarch64"))] extern "C" { pub(crate) fn dav1d_ipred_z1_fill2_8bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, + top: *const DynPixel, width: c_int, height: c_int, dx: c_int, max_base_x: c_int, ); pub(crate) fn dav1d_ipred_z1_fill1_8bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, + top: *const DynPixel, width: c_int, height: c_int, dx: c_int, max_base_x: c_int, ); pub(crate) fn dav1d_ipred_z1_upsample_edge_8bpc_neon( - out: *mut ::Pixel, + out: *mut DynPixel, hsz: c_int, - in_0: *const ::Pixel, + in_0: *const DynPixel, end: c_int, ); pub(crate) fn dav1d_ipred_z1_filter_edge_8bpc_neon( - out: *mut ::Pixel, + out: *mut DynPixel, sz: c_int, - in_0: *const ::Pixel, + in_0: *const DynPixel, end: c_int, strength: c_int, ); pub(crate) fn dav1d_ipred_z2_fill3_8bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, - left: *const ::Pixel, + top: *const DynPixel, + left: *const DynPixel, width: c_int, height: c_int, dx: c_int, dy: c_int, ); pub(crate) fn dav1d_ipred_z2_fill2_8bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, - left: *const ::Pixel, + top: *const DynPixel, + left: *const DynPixel, width: c_int, height: c_int, dx: c_int, dy: c_int, ); pub(crate) fn dav1d_ipred_z2_fill1_8bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, - left: *const ::Pixel, + top: *const DynPixel, + left: *const DynPixel, width: c_int, height: c_int, dx: c_int, dy: c_int, ); pub(crate) fn dav1d_ipred_z2_upsample_edge_8bpc_neon( - out: *mut ::Pixel, + out: *mut DynPixel, hsz: c_int, - in_0: *const ::Pixel, - ); - pub(crate) fn dav1d_ipred_reverse_8bpc_neon( - dst: *mut ::Pixel, - src: *const ::Pixel, - n: c_int, + in_0: *const DynPixel, ); + pub(crate) fn dav1d_ipred_reverse_8bpc_neon(dst: *mut DynPixel, src: *const DynPixel, n: c_int); pub(crate) fn dav1d_ipred_z3_fill2_8bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - left: *const ::Pixel, + left: *const DynPixel, width: c_int, height: c_int, dy: c_int, max_base_y: c_int, ); pub(crate) fn dav1d_ipred_z3_fill1_8bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - left: *const ::Pixel, + left: *const DynPixel, width: c_int, height: c_int, dy: c_int, max_base_y: c_int, ); pub(crate) fn dav1d_ipred_pixel_set_8bpc_neon( - out: *mut ::Pixel, + out: *mut DynPixel, px: ::Pixel, n: c_int, ); @@ -277,98 +273,98 @@ extern "C" { #[cfg(all(feature = "bitdepth_16", feature = "asm", target_arch = "aarch64"))] extern "C" { pub(crate) fn dav1d_ipred_z1_fill2_16bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, + top: *const DynPixel, width: c_int, height: c_int, dx: c_int, max_base_x: c_int, ); pub(crate) fn dav1d_ipred_z1_fill1_16bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, + top: *const DynPixel, width: c_int, height: c_int, dx: c_int, max_base_x: c_int, ); pub(crate) fn dav1d_ipred_z1_upsample_edge_16bpc_neon( - out: *mut ::Pixel, + out: *mut DynPixel, hsz: c_int, - in_0: *const ::Pixel, + in_0: *const DynPixel, end: c_int, bitdepth_max: c_int, ); pub(crate) fn dav1d_ipred_z1_filter_edge_16bpc_neon( - out: *mut ::Pixel, + out: *mut DynPixel, sz: c_int, - in_0: *const ::Pixel, + in_0: *const DynPixel, end: c_int, strength: c_int, ); pub(crate) fn dav1d_ipred_z2_fill3_16bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, - left: *const ::Pixel, + top: *const DynPixel, + left: *const DynPixel, width: c_int, height: c_int, dx: c_int, dy: c_int, ); pub(crate) fn dav1d_ipred_z2_fill2_16bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, - left: *const ::Pixel, + top: *const DynPixel, + left: *const DynPixel, width: c_int, height: c_int, dx: c_int, dy: c_int, ); pub(crate) fn dav1d_ipred_z2_fill1_16bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - top: *const ::Pixel, - left: *const ::Pixel, + top: *const DynPixel, + left: *const DynPixel, width: c_int, height: c_int, dx: c_int, dy: c_int, ); pub(crate) fn dav1d_ipred_z2_upsample_edge_16bpc_neon( - out: *mut ::Pixel, + out: *mut DynPixel, hsz: c_int, - in_0: *const ::Pixel, + in_0: *const DynPixel, bitdepth_max: c_int, ); pub(crate) fn dav1d_ipred_reverse_16bpc_neon( - dst: *mut ::Pixel, - src: *const ::Pixel, + dst: *mut DynPixel, + src: *const DynPixel, n: c_int, ); pub(crate) fn dav1d_ipred_z3_fill2_16bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - left: *const ::Pixel, + left: *const DynPixel, width: c_int, height: c_int, dy: c_int, max_base_y: c_int, ); pub(crate) fn dav1d_ipred_z3_fill1_16bpc_neon( - dst: *mut ::Pixel, + dst: *mut DynPixel, stride: ptrdiff_t, - left: *const ::Pixel, + left: *const DynPixel, width: c_int, height: c_int, dy: c_int, max_base_y: c_int, ); pub(crate) fn dav1d_ipred_pixel_set_16bpc_neon( - out: *mut ::Pixel, + out: *mut DynPixel, px: ::Pixel, n: c_int, ); diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 321e0dd79..548af200a 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -423,14 +423,14 @@ unsafe fn ipred_z3_neon( if upsample_left != 0 { flipped[0] = *topleft_in.offset(0); dav1d_ipred_reverse_16bpc_neon( - &mut *flipped.as_mut_ptr().offset(1), - &*topleft_in.offset(0), + flipped.as_mut_ptr().offset(1).cast(), + topleft_in.offset(0).cast(), height + cmp::max(width, height), ); dav1d_ipred_z1_upsample_edge_16bpc_neon( - left_out.as_mut_ptr(), + left_out.as_mut_ptr().cast(), width + height, - flipped.as_mut_ptr(), + flipped.as_mut_ptr().cast(), height + cmp::min(width, height), bitdepth_max, ); @@ -445,22 +445,22 @@ unsafe fn ipred_z3_neon( if filter_strength != 0 { flipped[0] = *topleft_in.offset(0); dav1d_ipred_reverse_16bpc_neon( - &mut *flipped.as_mut_ptr().offset(1), - &*topleft_in.offset(0), + flipped.as_mut_ptr().offset(1).cast(), + topleft_in.offset(0).cast(), height + cmp::max(width, height), ); dav1d_ipred_z1_filter_edge_16bpc_neon( - left_out.as_mut_ptr(), + left_out.as_mut_ptr().cast(), width + height, - flipped.as_mut_ptr(), + flipped.as_mut_ptr().cast(), height + cmp::min(width, height), filter_strength, ); max_base_y = width + height - 1; } else { dav1d_ipred_reverse_16bpc_neon( - left_out.as_mut_ptr(), - &*topleft_in.offset(0), + left_out.as_mut_ptr().cast(), + topleft_in.offset(0).cast(), height + cmp::min(width, height), ); max_base_y = height + cmp::min(width, height) - 1; @@ -469,15 +469,18 @@ unsafe fn ipred_z3_neon( let base_inc = 1 + upsample_left; let pad_pixels = cmp::max(64 - max_base_y - 1, height + 15); dav1d_ipred_pixel_set_16bpc_neon( - &mut *left_out.as_mut_ptr().offset((max_base_y + 1) as isize) as *mut pixel, + left_out + .as_mut_ptr() + .offset((max_base_y + 1) as isize) + .cast(), left_out[max_base_y as usize], (pad_pixels * base_inc) as c_int, ); if upsample_left != 0 { dav1d_ipred_z3_fill2_16bpc_neon( - dst, + dst.cast(), stride, - left_out.as_mut_ptr(), + left_out.as_mut_ptr().cast(), width, height, dy, @@ -485,9 +488,9 @@ unsafe fn ipred_z3_neon( ); } else { dav1d_ipred_z3_fill1_16bpc_neon( - dst, + dst.cast(), stride, - left_out.as_mut_ptr(), + left_out.as_mut_ptr().cast(), width, height, dy, @@ -562,9 +565,9 @@ unsafe fn ipred_z2_neon( if upsample_above != 0 { dav1d_ipred_z2_upsample_edge_16bpc_neon( - buf.as_mut_ptr().offset(top_offset), + buf.as_mut_ptr().offset(top_offset).cast(), width, - topleft_in, + topleft_in.cast(), bitdepth_max, ); dx <<= 1; @@ -577,9 +580,9 @@ unsafe fn ipred_z2_neon( if filter_strength != 0 { dav1d_ipred_z1_filter_edge_16bpc_neon( - buf.as_mut_ptr().offset(1 + top_offset), + buf.as_mut_ptr().offset(1 + top_offset).cast(), cmp::min(max_width, width), - topleft_in, + topleft_in.cast(), width, filter_strength, ); @@ -603,14 +606,14 @@ unsafe fn ipred_z2_neon( if upsample_left != 0 { buf[flipped_offset as usize] = *topleft_in; dav1d_ipred_reverse_16bpc_neon( - &mut *buf.as_mut_ptr().offset(1 + flipped_offset), - topleft_in, + buf.as_mut_ptr().offset(1 + flipped_offset).cast(), + topleft_in.cast(), height, ); dav1d_ipred_z2_upsample_edge_16bpc_neon( - buf.as_mut_ptr().offset(left_offset), + buf.as_mut_ptr().offset(left_offset).cast(), height, - buf.as_ptr().offset(flipped_offset), + buf.as_ptr().offset(flipped_offset).cast(), bitdepth_max, ); dy <<= 1; @@ -623,14 +626,14 @@ unsafe fn ipred_z2_neon( if filter_strength != 0 { buf[flipped_offset as usize] = *topleft_in; dav1d_ipred_reverse_16bpc_neon( - &mut *buf.as_mut_ptr().offset(1 + flipped_offset), - topleft_in, + buf.as_mut_ptr().offset(1 + flipped_offset).cast(), + topleft_in.cast(), height, ); dav1d_ipred_z1_filter_edge_16bpc_neon( - buf.as_mut_ptr().offset(1 + left_offset), + buf.as_mut_ptr().offset(1 + left_offset).cast(), cmp::min(max_height, height), - buf.as_ptr().offset(flipped_offset), + buf.as_ptr().offset(flipped_offset).cast(), height, filter_strength, ); @@ -647,8 +650,8 @@ unsafe fn ipred_z2_neon( } } else { dav1d_ipred_reverse_16bpc_neon( - buf.as_mut_ptr().offset(left_offset + 1), - topleft_in, + buf.as_mut_ptr().offset(left_offset + 1).cast(), + topleft_in.cast(), height, ); } @@ -662,10 +665,10 @@ unsafe fn ipred_z2_neon( if upsample_above == 0 && upsample_left == 0 { dav1d_ipred_z2_fill1_16bpc_neon( - dst, + dst.cast(), stride, - buf.as_ptr().offset(top_offset), - buf.as_ptr().offset(left_offset), + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), width, height, dx, @@ -673,10 +676,10 @@ unsafe fn ipred_z2_neon( ); } else if upsample_above != 0 { dav1d_ipred_z2_fill2_16bpc_neon( - dst, + dst.cast(), stride, - buf.as_ptr().offset(top_offset), - buf.as_ptr().offset(left_offset), + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), width, height, dx, @@ -684,10 +687,10 @@ unsafe fn ipred_z2_neon( ); } else { dav1d_ipred_z2_fill3_16bpc_neon( - dst, + dst.cast(), stride, - buf.as_ptr().offset(top_offset), - buf.as_ptr().offset(left_offset), + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), width, height, dx, @@ -747,9 +750,9 @@ unsafe fn ipred_z1_neon( }; if upsample_above != 0 { dav1d_ipred_z1_upsample_edge_16bpc_neon( - top_out.as_mut_ptr(), + top_out.as_mut_ptr().cast(), width + height, - topleft_in, + topleft_in.cast(), width + cmp::min(width, height), bitdepth_max, ); @@ -763,9 +766,9 @@ unsafe fn ipred_z1_neon( }; if filter_strength != 0 { dav1d_ipred_z1_filter_edge_16bpc_neon( - top_out.as_mut_ptr(), + top_out.as_mut_ptr().cast(), width + height, - topleft_in, + topleft_in.cast(), width + cmp::min(width, height), filter_strength, ); @@ -782,15 +785,18 @@ unsafe fn ipred_z1_neon( let base_inc = 1 + upsample_above; let pad_pixels = width + 15; dav1d_ipred_pixel_set_16bpc_neon( - &mut *top_out.as_mut_ptr().offset((max_base_x + 1) as isize) as *mut pixel, + top_out + .as_mut_ptr() + .offset((max_base_x + 1) as isize) + .cast(), top_out[max_base_x as usize], (pad_pixels * base_inc) as c_int, ); if upsample_above != 0 { dav1d_ipred_z1_fill2_16bpc_neon( - dst, + dst.cast(), stride, - top_out.as_mut_ptr(), + top_out.as_mut_ptr().cast(), width, height, dx, @@ -798,9 +804,9 @@ unsafe fn ipred_z1_neon( ); } else { dav1d_ipred_z1_fill1_16bpc_neon( - dst, + dst.cast(), stride, - top_out.as_mut_ptr(), + top_out.as_mut_ptr().cast(), width, height, dx, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 721d5651d..8fce0e53e 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -427,14 +427,14 @@ unsafe fn ipred_z3_neon( if upsample_left != 0 { flipped[0] = *topleft_in.offset(0); dav1d_ipred_reverse_8bpc_neon( - &mut *flipped.as_mut_ptr().offset(1), - &*topleft_in.offset(0), + flipped.as_mut_ptr().offset(1).cast(), + &*topleft_in.offset(0).cast(), height + cmp::max(width, height), ); dav1d_ipred_z1_upsample_edge_8bpc_neon( - left_out.as_mut_ptr(), + left_out.as_mut_ptr().cast(), width + height, - flipped.as_mut_ptr(), + flipped.as_mut_ptr().cast(), height + cmp::min(width, height), ); max_base_y = 2 * (width + height) - 2; @@ -448,22 +448,22 @@ unsafe fn ipred_z3_neon( if filter_strength != 0 { flipped[0] = *topleft_in.offset(0); dav1d_ipred_reverse_8bpc_neon( - &mut *flipped.as_mut_ptr().offset(1), - &*topleft_in.offset(0), + flipped.as_mut_ptr().offset(1).cast(), + topleft_in.offset(0).cast(), height + cmp::max(width, height), ); dav1d_ipred_z1_filter_edge_8bpc_neon( - left_out.as_mut_ptr(), + left_out.as_mut_ptr().cast(), width + height, - flipped.as_mut_ptr(), + flipped.as_mut_ptr().cast(), height + cmp::min(width, height), filter_strength, ); max_base_y = width + height - 1; } else { dav1d_ipred_reverse_8bpc_neon( - left_out.as_mut_ptr(), - &*topleft_in.offset(0), + left_out.as_mut_ptr().cast(), + topleft_in.offset(0).cast(), height + cmp::min(width, height), ); max_base_y = height + cmp::min(width, height) - 1; @@ -472,15 +472,18 @@ unsafe fn ipred_z3_neon( let base_inc = 1 + upsample_left; let pad_pixels = cmp::max(64 - max_base_y - 1, height + 15); dav1d_ipred_pixel_set_8bpc_neon( - &mut *left_out.as_mut_ptr().offset((max_base_y + 1) as isize) as *mut pixel, + left_out + .as_mut_ptr() + .offset((max_base_y + 1) as isize) + .cast(), left_out[max_base_y as usize], (pad_pixels * base_inc) as c_int, ); if upsample_left != 0 { dav1d_ipred_z3_fill2_8bpc_neon( - dst, + dst.cast(), stride, - left_out.as_mut_ptr(), + left_out.as_mut_ptr().cast(), width, height, dy, @@ -488,9 +491,9 @@ unsafe fn ipred_z3_neon( ); } else { dav1d_ipred_z3_fill1_8bpc_neon( - dst, + dst.cast(), stride, - left_out.as_mut_ptr(), + left_out.as_mut_ptr().cast(), width, height, dy, @@ -562,9 +565,9 @@ unsafe fn ipred_z2_neon( if upsample_above != 0 { dav1d_ipred_z2_upsample_edge_8bpc_neon( - buf.as_mut_ptr().offset(top_offset), + buf.as_mut_ptr().offset(top_offset).cast(), width, - topleft_in, + topleft_in.cast(), ); dx <<= 1; } else { @@ -576,9 +579,9 @@ unsafe fn ipred_z2_neon( if filter_strength != 0 { dav1d_ipred_z1_filter_edge_8bpc_neon( - buf.as_mut_ptr().offset(1 + top_offset), + buf.as_mut_ptr().offset(1 + top_offset).cast(), cmp::min(max_width, width), - topleft_in, + topleft_in.cast(), width, filter_strength, ); @@ -602,14 +605,14 @@ unsafe fn ipred_z2_neon( if upsample_left != 0 { buf[flipped_offset as usize] = *topleft_in; dav1d_ipred_reverse_8bpc_neon( - &mut *buf.as_mut_ptr().offset(1 + flipped_offset), - topleft_in, + buf.as_mut_ptr().offset(1 + flipped_offset).cast(), + topleft_in.cast(), height, ); dav1d_ipred_z2_upsample_edge_8bpc_neon( - buf.as_mut_ptr().offset(left_offset), + buf.as_mut_ptr().offset(left_offset).cast(), height, - buf.as_ptr().offset(flipped_offset), + buf.as_ptr().offset(flipped_offset).cast(), ); dy <<= 1; } else { @@ -621,14 +624,14 @@ unsafe fn ipred_z2_neon( if filter_strength != 0 { buf[flipped_offset as usize] = *topleft_in; dav1d_ipred_reverse_8bpc_neon( - &mut *buf.as_mut_ptr().offset(1 + flipped_offset), - topleft_in, + buf.as_mut_ptr().offset(1 + flipped_offset).cast(), + topleft_in.cast(), height, ); dav1d_ipred_z1_filter_edge_8bpc_neon( - buf.as_mut_ptr().offset(1 + left_offset), + buf.as_mut_ptr().offset(1 + left_offset).cast(), cmp::min(max_height, height), - buf.as_ptr().offset(flipped_offset), + buf.as_ptr().offset(flipped_offset).cast(), height, filter_strength, ); @@ -645,8 +648,8 @@ unsafe fn ipred_z2_neon( } } else { dav1d_ipred_reverse_8bpc_neon( - buf.as_mut_ptr().offset(left_offset + 1), - topleft_in, + buf.as_mut_ptr().offset(left_offset + 1).cast(), + topleft_in.cast(), height, ); } @@ -660,10 +663,10 @@ unsafe fn ipred_z2_neon( if upsample_above == 0 && upsample_left == 0 { dav1d_ipred_z2_fill1_8bpc_neon( - dst, + dst.cast(), stride, - buf.as_ptr().offset(top_offset), - buf.as_ptr().offset(left_offset), + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), width, height, dx, @@ -671,10 +674,10 @@ unsafe fn ipred_z2_neon( ); } else if upsample_above != 0 { dav1d_ipred_z2_fill2_8bpc_neon( - dst, + dst.cast(), stride, - buf.as_ptr().offset(top_offset), - buf.as_ptr().offset(left_offset), + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), width, height, dx, @@ -682,10 +685,10 @@ unsafe fn ipred_z2_neon( ); } else { dav1d_ipred_z2_fill3_8bpc_neon( - dst, + dst.cast(), stride, - buf.as_ptr().offset(top_offset), - buf.as_ptr().offset(left_offset), + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), width, height, dx, @@ -743,9 +746,9 @@ unsafe fn ipred_z1_neon( }; if upsample_above != 0 { dav1d_ipred_z1_upsample_edge_8bpc_neon( - top_out.as_mut_ptr(), + top_out.as_mut_ptr().cast(), width + height, - topleft_in, + topleft_in.cast(), width + cmp::min(width, height), ); max_base_x = 2 * (width + height) - 2; @@ -758,9 +761,9 @@ unsafe fn ipred_z1_neon( }; if filter_strength != 0 { dav1d_ipred_z1_filter_edge_8bpc_neon( - top_out.as_mut_ptr(), + top_out.as_mut_ptr().cast(), width + height, - topleft_in, + topleft_in.cast(), width + cmp::min(width, height), filter_strength, ); @@ -777,15 +780,18 @@ unsafe fn ipred_z1_neon( let base_inc = 1 + upsample_above; let pad_pixels = width + 15; dav1d_ipred_pixel_set_8bpc_neon( - &mut *top_out.as_mut_ptr().offset((max_base_x + 1) as isize) as *mut pixel, + top_out + .as_mut_ptr() + .offset((max_base_x + 1) as isize) + .cast(), top_out[max_base_x as usize], (pad_pixels * base_inc) as c_int, ); if upsample_above != 0 { dav1d_ipred_z1_fill2_8bpc_neon( - dst, + dst.cast(), stride, - top_out.as_mut_ptr(), + top_out.as_mut_ptr().cast(), width, height, dx, @@ -793,9 +799,9 @@ unsafe fn ipred_z1_neon( ); } else { dav1d_ipred_z1_fill1_8bpc_neon( - dst, + dst.cast(), stride, - top_out.as_mut_ptr(), + top_out.as_mut_ptr().cast(), width, height, dx, From 29cc7b398f6f7c273fe705fd76b6b19bcb0bc1d0 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 17 Nov 2023 00:15:49 -0800 Subject: [PATCH 37/49] `fn dav1d_ipred_z{1,2}_upsample_edge_8bpc_neon`: Add `bitdepth_max: c_int` args to unify the signatures with the `16bpc` versions. --- src/ipred.rs | 2 ++ src/ipred_tmpl_8.rs | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/ipred.rs b/src/ipred.rs index 8cd13c338..0d3bc7f31 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -200,6 +200,7 @@ extern "C" { hsz: c_int, in_0: *const DynPixel, end: c_int, + _bitdepth_max: c_int, ); pub(crate) fn dav1d_ipred_z1_filter_edge_8bpc_neon( out: *mut DynPixel, @@ -242,6 +243,7 @@ extern "C" { out: *mut DynPixel, hsz: c_int, in_0: *const DynPixel, + _bitdepth_max: c_int, ); pub(crate) fn dav1d_ipred_reverse_8bpc_neon(dst: *mut DynPixel, src: *const DynPixel, n: c_int); pub(crate) fn dav1d_ipred_z3_fill2_8bpc_neon( diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 8fce0e53e..5c817cc6d 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -436,6 +436,7 @@ unsafe fn ipred_z3_neon( width + height, flipped.as_mut_ptr().cast(), height + cmp::min(width, height), + 8, ); max_base_y = 2 * (width + height) - 2; dy <<= 1; @@ -568,6 +569,7 @@ unsafe fn ipred_z2_neon( buf.as_mut_ptr().offset(top_offset).cast(), width, topleft_in.cast(), + 8, ); dx <<= 1; } else { @@ -613,6 +615,7 @@ unsafe fn ipred_z2_neon( buf.as_mut_ptr().offset(left_offset).cast(), height, buf.as_ptr().offset(flipped_offset).cast(), + 8, ); dy <<= 1; } else { @@ -750,6 +753,7 @@ unsafe fn ipred_z1_neon( width + height, topleft_in.cast(), width + cmp::min(width, height), + 8, ); max_base_x = 2 * (width + height) - 2; dx <<= 1; From 285a32e350cd56d6d0e832932e8c80a0e22af159 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 17 Nov 2023 10:29:10 -0800 Subject: [PATCH 38/49] `fn ipred_z1_neon`: Dedupicate w/ generics. --- src/ipred.rs | 110 +++++++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 97 ++------------------------------------ src/ipred_tmpl_8.rs | 98 ++------------------------------------ 3 files changed, 117 insertions(+), 188 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 0d3bc7f31..c7d0ad318 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -25,6 +25,12 @@ use crate::include::common::bitdepth::BitDepth8; #[cfg(feature = "bitdepth_16")] use crate::include::common::bitdepth::BitDepth16; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use crate::include::common::bitdepth::bd_fn; + +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use ::to_method::To; + pub type angular_ipred_fn = unsafe extern "C" fn( *mut DynPixel, ptrdiff_t, @@ -1678,3 +1684,107 @@ pub(crate) unsafe fn pal_pred_rust( y += 1; } } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +pub(crate) unsafe fn ipred_z1_neon( + dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft_in: *const BD::Pixel, + width: c_int, + height: c_int, + mut angle: c_int, + _max_width: c_int, + _max_height: c_int, + bd: BD, +) { + let is_sm = angle >> 9 & 0x1 as c_int; + let enable_intra_edge_filter = angle >> 10; + angle &= 511 as c_int; + let mut dx = dav1d_dr_intra_derivative[(angle >> 1) as usize] as c_int; + const top_out_size: usize = 64 + 64 * (64 + 15) * 2 + 16; + let mut top_out: [BD::Pixel; top_out_size] = [0.into(); top_out_size]; + let max_base_x; + let upsample_above = if enable_intra_edge_filter != 0 { + get_upsample(width + height, 90 - angle, is_sm) + } else { + 0 as c_int + }; + if upsample_above != 0 { + bd_fn!(BD, ipred_z1_upsample_edge, neon)( + top_out.as_mut_ptr().cast(), + width + height, + topleft_in.cast(), + width + cmp::min(width, height), + bd.into_c(), + ); + max_base_x = 2 * (width + height) - 2; + dx <<= 1; + } else { + let filter_strength = if enable_intra_edge_filter != 0 { + get_filter_strength(width + height, 90 - angle, is_sm) + } else { + 0 as c_int + }; + if filter_strength != 0 { + bd_fn!(BD, ipred_z1_filter_edge, neon)( + top_out.as_mut_ptr().cast(), + width + height, + topleft_in.cast(), + width + cmp::min(width, height), + filter_strength, + ); + max_base_x = width + height - 1; + } else { + max_base_x = width + cmp::min(width, height) - 1; + memcpy( + top_out.as_mut_ptr() as *mut c_void, + &*topleft_in.offset(1) as *const BD::Pixel as *const c_void, + ((max_base_x + 1) as usize).wrapping_mul(::core::mem::size_of::()), + ); + } + } + let base_inc = 1 + upsample_above; + let pad_pixels = width + 15; + { + // `pixel_set` takes a `px: BD::Pixel`. + // Since it's not behind a ptr, we can't make it a `DynPixel` + // and call it uniformly with `bd_fn!`. + let out = top_out + .as_mut_ptr() + .offset((max_base_x + 1) as isize) + .cast(); + let px = top_out[max_base_x as usize]; + let n = (pad_pixels * base_inc) as c_int; + match BD::BPC { + BPC::BPC8 => dav1d_ipred_pixel_set_8bpc_neon( + out, + // Really a no-op cast, but it's difficult to do it properly with generics. + px.to::() as ::Pixel, + n, + ), + BPC::BPC16 => dav1d_ipred_pixel_set_16bpc_neon(out, px.into(), n), + } + } + if upsample_above != 0 { + bd_fn!(BD, ipred_z1_fill2, neon)( + dst.cast(), + stride, + top_out.as_mut_ptr().cast(), + width, + height, + dx, + max_base_x, + ); + } else { + bd_fn!(BD, ipred_z1_fill1, neon)( + dst.cast(), + stride, + top_out.as_mut_ptr().cast(), + width, + height, + dx, + max_base_x, + ); + }; +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 548af200a..914ee6367 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -45,13 +45,13 @@ use std::ffi::c_void; #[cfg(all(feature = "asm", target_arch = "aarch64"))] use crate::{ src::ipred::dav1d_ipred_pixel_set_16bpc_neon, src::ipred::dav1d_ipred_reverse_16bpc_neon, - src::ipred::dav1d_ipred_z1_fill1_16bpc_neon, src::ipred::dav1d_ipred_z1_fill2_16bpc_neon, src::ipred::dav1d_ipred_z1_filter_edge_16bpc_neon, src::ipred::dav1d_ipred_z1_upsample_edge_16bpc_neon, src::ipred::dav1d_ipred_z2_fill1_16bpc_neon, src::ipred::dav1d_ipred_z2_fill2_16bpc_neon, src::ipred::dav1d_ipred_z2_fill3_16bpc_neon, src::ipred::dav1d_ipred_z2_upsample_edge_16bpc_neon, src::ipred::dav1d_ipred_z3_fill1_16bpc_neon, src::ipred::dav1d_ipred_z3_fill2_16bpc_neon, + src::ipred::ipred_z1_neon, }; #[cfg(feature = "asm")] @@ -711,7 +711,7 @@ unsafe extern "C" fn ipred_z1_neon_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z1_neon( + ipred_z1_neon::( dst.cast(), stride, topleft_in.cast(), @@ -720,99 +720,8 @@ unsafe extern "C" fn ipred_z1_neon_erased( angle, max_width, max_height, - bitdepth_max, - ); -} - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe fn ipred_z1_neon( - dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - let mut dx = dav1d_dr_intra_derivative[(angle >> 1) as usize] as c_int; - const top_out_size: usize = 64 + 64 * (64 + 15) * 2 + 16; - let mut top_out: [pixel; top_out_size] = [0; top_out_size]; - let max_base_x; - let upsample_above = if enable_intra_edge_filter != 0 { - get_upsample(width + height, 90 - angle, is_sm) - } else { - 0 as c_int - }; - if upsample_above != 0 { - dav1d_ipred_z1_upsample_edge_16bpc_neon( - top_out.as_mut_ptr().cast(), - width + height, - topleft_in.cast(), - width + cmp::min(width, height), - bitdepth_max, - ); - max_base_x = 2 * (width + height) - 2; - dx <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, 90 - angle, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - dav1d_ipred_z1_filter_edge_16bpc_neon( - top_out.as_mut_ptr().cast(), - width + height, - topleft_in.cast(), - width + cmp::min(width, height), - filter_strength, - ); - max_base_x = width + height - 1; - } else { - max_base_x = width + cmp::min(width, height) - 1; - memcpy( - top_out.as_mut_ptr() as *mut c_void, - &*topleft_in.offset(1) as *const pixel as *const c_void, - ((max_base_x + 1) as usize).wrapping_mul(::core::mem::size_of::()), - ); - } - } - let base_inc = 1 + upsample_above; - let pad_pixels = width + 15; - dav1d_ipred_pixel_set_16bpc_neon( - top_out - .as_mut_ptr() - .offset((max_base_x + 1) as isize) - .cast(), - top_out[max_base_x as usize], - (pad_pixels * base_inc) as c_int, + BitDepth16::from_c(bitdepth_max), ); - if upsample_above != 0 { - dav1d_ipred_z1_fill2_16bpc_neon( - dst.cast(), - stride, - top_out.as_mut_ptr().cast(), - width, - height, - dx, - max_base_x, - ); - } else { - dav1d_ipred_z1_fill1_16bpc_neon( - dst.cast(), - stride, - top_out.as_mut_ptr().cast(), - width, - height, - dx, - max_base_x, - ); - }; } #[cold] diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 5c817cc6d..6bed603e7 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -45,12 +45,11 @@ use std::ffi::c_void; #[cfg(all(feature = "asm", target_arch = "aarch64"))] use crate::{ src::ipred::dav1d_ipred_pixel_set_8bpc_neon, src::ipred::dav1d_ipred_reverse_8bpc_neon, - src::ipred::dav1d_ipred_z1_fill1_8bpc_neon, src::ipred::dav1d_ipred_z1_fill2_8bpc_neon, src::ipred::dav1d_ipred_z1_filter_edge_8bpc_neon, src::ipred::dav1d_ipred_z1_upsample_edge_8bpc_neon, src::ipred::dav1d_ipred_z2_fill1_8bpc_neon, src::ipred::dav1d_ipred_z2_fill2_8bpc_neon, src::ipred::dav1d_ipred_z2_fill3_8bpc_neon, src::ipred::dav1d_ipred_z2_upsample_edge_8bpc_neon, src::ipred::dav1d_ipred_z3_fill1_8bpc_neon, - src::ipred::dav1d_ipred_z3_fill2_8bpc_neon, + src::ipred::dav1d_ipred_z3_fill2_8bpc_neon, src::ipred::ipred_z1_neon, }; #[cfg(feature = "asm")] @@ -710,9 +709,9 @@ unsafe extern "C" fn ipred_z1_neon_erased( angle: c_int, max_width: c_int, max_height: c_int, - _bitdepth_max: c_int, + bitdepth_max: c_int, ) { - ipred_z1_neon( + ipred_z1_neon::( dst.cast(), stride, topleft_in.cast(), @@ -721,99 +720,10 @@ unsafe extern "C" fn ipred_z1_neon_erased( angle, max_width, max_height, + BitDepth8::from_c(bitdepth_max), ); } -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe fn ipred_z1_neon( - dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - let mut dx = dav1d_dr_intra_derivative[(angle >> 1) as usize] as c_int; - const top_out_size: usize = 64 + 64 * (64 + 15) * 2 + 16; - let mut top_out: [pixel; top_out_size] = [0; top_out_size]; - let max_base_x; - let upsample_above = if enable_intra_edge_filter != 0 { - get_upsample(width + height, 90 - angle, is_sm) - } else { - 0 as c_int - }; - if upsample_above != 0 { - dav1d_ipred_z1_upsample_edge_8bpc_neon( - top_out.as_mut_ptr().cast(), - width + height, - topleft_in.cast(), - width + cmp::min(width, height), - 8, - ); - max_base_x = 2 * (width + height) - 2; - dx <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, 90 - angle, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - dav1d_ipred_z1_filter_edge_8bpc_neon( - top_out.as_mut_ptr().cast(), - width + height, - topleft_in.cast(), - width + cmp::min(width, height), - filter_strength, - ); - max_base_x = width + height - 1; - } else { - max_base_x = width + cmp::min(width, height) - 1; - memcpy( - top_out.as_mut_ptr() as *mut c_void, - &*topleft_in.offset(1) as *const pixel as *const c_void, - ((max_base_x + 1) as usize).wrapping_mul(::core::mem::size_of::()), - ); - } - } - let base_inc = 1 + upsample_above; - let pad_pixels = width + 15; - dav1d_ipred_pixel_set_8bpc_neon( - top_out - .as_mut_ptr() - .offset((max_base_x + 1) as isize) - .cast(), - top_out[max_base_x as usize], - (pad_pixels * base_inc) as c_int, - ); - if upsample_above != 0 { - dav1d_ipred_z1_fill2_8bpc_neon( - dst.cast(), - stride, - top_out.as_mut_ptr().cast(), - width, - height, - dx, - max_base_x, - ); - } else { - dav1d_ipred_z1_fill1_8bpc_neon( - dst.cast(), - stride, - top_out.as_mut_ptr().cast(), - width, - height, - dx, - max_base_x, - ); - }; -} - #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); From 5b9b09c285499eea63d27054567975c0f03f39a2 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 17 Nov 2023 10:44:41 -0800 Subject: [PATCH 39/49] `fn ipred_z2_neon`: Deduplicate w/ generics. --- src/ipred.rs | 178 +++++++++++++++++++++++++++++++++++++++++ src/ipred_tmpl_16.rs | 184 +------------------------------------------ src/ipred_tmpl_8.rs | 181 +----------------------------------------- 3 files changed, 185 insertions(+), 358 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index c7d0ad318..53d8ebc5c 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1788,3 +1788,181 @@ pub(crate) unsafe fn ipred_z1_neon( ); }; } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +pub(crate) unsafe fn ipred_z2_neon( + dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft_in: *const BD::Pixel, + width: c_int, + height: c_int, + mut angle: c_int, + max_width: c_int, + max_height: c_int, + bd: BD, +) { + let is_sm = angle >> 9 & 0x1 as c_int; + let enable_intra_edge_filter = angle >> 10; + angle &= 511 as c_int; + if !(angle > 90 && angle < 180) { + unreachable!(); + } + let mut dy = dav1d_dr_intra_derivative[((angle - 90) >> 1) as usize] as c_int; + let mut dx = dav1d_dr_intra_derivative[((180 - angle) >> 1) as usize] as c_int; + let mut buf: [BD::Pixel; 3 * (64 + 1)] = [0.into(); 3 * (64 + 1)]; // NOTE: C code doesn't initialize + + // The asm can underread below the start of top[] and left[]; to avoid + // surprising behaviour, make sure this is within the allocated stack space. + let left_offset: isize = 2 * (64 + 1); + let top_offset: isize = 1 * (64 + 1); + let flipped_offset: isize = 0 * (64 + 1); + + let upsample_left = if enable_intra_edge_filter != 0 { + get_upsample(width + height, 180 - angle, is_sm) + } else { + 0 as c_int + }; + let upsample_above = if enable_intra_edge_filter != 0 { + get_upsample(width + height, angle - 90, is_sm) + } else { + 0 as c_int + }; + + if upsample_above != 0 { + bd_fn!(BD, ipred_z2_upsample_edge, neon)( + buf.as_mut_ptr().offset(top_offset).cast(), + width, + topleft_in.cast(), + bd.into_c(), + ); + dx <<= 1; + } else { + let filter_strength = if enable_intra_edge_filter != 0 { + get_filter_strength(width + height, angle - 90, is_sm) + } else { + 0 as c_int + }; + + if filter_strength != 0 { + bd_fn!(BD, ipred_z1_filter_edge, neon)( + buf.as_mut_ptr().offset(1 + top_offset).cast(), + cmp::min(max_width, width), + topleft_in.cast(), + width, + filter_strength, + ); + + if max_width < width { + memcpy( + buf.as_mut_ptr().offset(top_offset + 1 + max_width as isize) as *mut c_void, + topleft_in.offset(1 + max_width as isize) as *const c_void, + ((width - max_width) as usize) + .wrapping_mul(::core::mem::size_of::()), + ); + } + } else { + BD::pixel_copy( + &mut buf[1 + top_offset as usize..], + core::slice::from_raw_parts(topleft_in.offset(1), width as usize), + width as usize, + ); + } + } + + if upsample_left != 0 { + buf[flipped_offset as usize] = *topleft_in; + bd_fn!(BD, ipred_reverse, neon)( + buf.as_mut_ptr().offset(1 + flipped_offset).cast(), + topleft_in.cast(), + height, + ); + bd_fn!(BD, ipred_z2_upsample_edge, neon)( + buf.as_mut_ptr().offset(left_offset).cast(), + height, + buf.as_ptr().offset(flipped_offset).cast(), + bd.into_c(), + ); + dy <<= 1; + } else { + let filter_strength = if enable_intra_edge_filter != 0 { + get_filter_strength(width + height, 180 - angle, is_sm) + } else { + 0 as c_int + }; + if filter_strength != 0 { + buf[flipped_offset as usize] = *topleft_in; + bd_fn!(BD, ipred_reverse, neon)( + buf.as_mut_ptr().offset(1 + flipped_offset).cast(), + topleft_in.cast(), + height, + ); + bd_fn!(BD, ipred_z1_filter_edge, neon)( + buf.as_mut_ptr().offset(1 + left_offset).cast(), + cmp::min(max_height, height), + buf.as_ptr().offset(flipped_offset).cast(), + height, + filter_strength, + ); + if max_height < height { + memcpy( + buf.as_mut_ptr() + .offset(left_offset + 1 + max_height as isize) + as *mut c_void, + buf.as_mut_ptr() + .offset(flipped_offset + 1 + max_height as isize) + as *const c_void, + ((height - max_height) as usize) + .wrapping_mul(::core::mem::size_of::()), + ); + } + } else { + bd_fn!(BD, ipred_reverse, neon)( + buf.as_mut_ptr().offset(left_offset + 1).cast(), + topleft_in.cast(), + height, + ); + } + } + buf[top_offset as usize] = *topleft_in; + buf[left_offset as usize] = *topleft_in; + + if upsample_above != 0 && upsample_left != 0 { + unreachable!(); + } + + if upsample_above == 0 && upsample_left == 0 { + bd_fn!(BD, ipred_z2_fill1, neon)( + dst.cast(), + stride, + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), + width, + height, + dx, + dy, + ); + } else if upsample_above != 0 { + bd_fn!(BD, ipred_z2_fill2, neon)( + dst.cast(), + stride, + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), + width, + height, + dx, + dy, + ); + } else { + bd_fn!(BD, ipred_z2_fill3, neon)( + dst.cast(), + stride, + buf.as_ptr().offset(top_offset).cast(), + buf.as_ptr().offset(left_offset).cast(), + width, + height, + dx, + dy, + ); + }; +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 914ee6367..9baf73751 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -47,11 +47,8 @@ use crate::{ src::ipred::dav1d_ipred_pixel_set_16bpc_neon, src::ipred::dav1d_ipred_reverse_16bpc_neon, src::ipred::dav1d_ipred_z1_filter_edge_16bpc_neon, src::ipred::dav1d_ipred_z1_upsample_edge_16bpc_neon, - src::ipred::dav1d_ipred_z2_fill1_16bpc_neon, src::ipred::dav1d_ipred_z2_fill2_16bpc_neon, - src::ipred::dav1d_ipred_z2_fill3_16bpc_neon, - src::ipred::dav1d_ipred_z2_upsample_edge_16bpc_neon, src::ipred::dav1d_ipred_z3_fill1_16bpc_neon, src::ipred::dav1d_ipred_z3_fill2_16bpc_neon, - src::ipred::ipred_z1_neon, + src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, }; #[cfg(feature = "asm")] @@ -511,7 +508,7 @@ unsafe extern "C" fn ipred_z2_neon_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z2_neon( + ipred_z2_neon::( dst.cast(), stride, topleft_in.cast(), @@ -520,185 +517,10 @@ unsafe extern "C" fn ipred_z2_neon_erased( angle, max_width, max_height, - bitdepth_max, + BitDepth16::from_c(bitdepth_max), ); } -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe fn ipred_z2_neon( - dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle > 90 && angle < 180) { - unreachable!(); - } - let mut dy = dav1d_dr_intra_derivative[((angle - 90) >> 1) as usize] as c_int; - let mut dx = dav1d_dr_intra_derivative[((180 - angle) >> 1) as usize] as c_int; - let mut buf: [pixel; 3 * (64 + 1)] = [0; 3 * (64 + 1)]; // NOTE: C code doesn't initialize - - // The asm can underread below the start of top[] and left[]; to avoid - // surprising behaviour, make sure this is within the allocated stack space. - let left_offset: isize = 2 * (64 + 1); - let top_offset: isize = 1 * (64 + 1); - let flipped_offset: isize = 0 * (64 + 1); - - let upsample_left = if enable_intra_edge_filter != 0 { - get_upsample(width + height, 180 - angle, is_sm) - } else { - 0 as c_int - }; - let upsample_above = if enable_intra_edge_filter != 0 { - get_upsample(width + height, angle - 90, is_sm) - } else { - 0 as c_int - }; - - if upsample_above != 0 { - dav1d_ipred_z2_upsample_edge_16bpc_neon( - buf.as_mut_ptr().offset(top_offset).cast(), - width, - topleft_in.cast(), - bitdepth_max, - ); - dx <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, angle - 90, is_sm) - } else { - 0 as c_int - }; - - if filter_strength != 0 { - dav1d_ipred_z1_filter_edge_16bpc_neon( - buf.as_mut_ptr().offset(1 + top_offset).cast(), - cmp::min(max_width, width), - topleft_in.cast(), - width, - filter_strength, - ); - - if max_width < width { - memcpy( - buf.as_mut_ptr().offset(top_offset + 1 + max_width as isize) as *mut c_void, - topleft_in.offset(1 + max_width as isize) as *const c_void, - ((width - max_width) as usize).wrapping_mul(::core::mem::size_of::()), - ); - } - } else { - BitDepth16::pixel_copy( - &mut buf[1 + top_offset as usize..], - core::slice::from_raw_parts(topleft_in.offset(1), width as usize), - width as usize, - ); - } - } - - if upsample_left != 0 { - buf[flipped_offset as usize] = *topleft_in; - dav1d_ipred_reverse_16bpc_neon( - buf.as_mut_ptr().offset(1 + flipped_offset).cast(), - topleft_in.cast(), - height, - ); - dav1d_ipred_z2_upsample_edge_16bpc_neon( - buf.as_mut_ptr().offset(left_offset).cast(), - height, - buf.as_ptr().offset(flipped_offset).cast(), - bitdepth_max, - ); - dy <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, 180 - angle, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - buf[flipped_offset as usize] = *topleft_in; - dav1d_ipred_reverse_16bpc_neon( - buf.as_mut_ptr().offset(1 + flipped_offset).cast(), - topleft_in.cast(), - height, - ); - dav1d_ipred_z1_filter_edge_16bpc_neon( - buf.as_mut_ptr().offset(1 + left_offset).cast(), - cmp::min(max_height, height), - buf.as_ptr().offset(flipped_offset).cast(), - height, - filter_strength, - ); - if max_height < height { - memcpy( - buf.as_mut_ptr() - .offset(left_offset + 1 + max_height as isize) - as *mut c_void, - buf.as_mut_ptr() - .offset(flipped_offset + 1 + max_height as isize) - as *const c_void, - ((height - max_height) as usize).wrapping_mul(::core::mem::size_of::()), - ); - } - } else { - dav1d_ipred_reverse_16bpc_neon( - buf.as_mut_ptr().offset(left_offset + 1).cast(), - topleft_in.cast(), - height, - ); - } - } - buf[top_offset as usize] = *topleft_in; - buf[left_offset as usize] = *topleft_in; - - if upsample_above != 0 && upsample_left != 0 { - unreachable!(); - } - - if upsample_above == 0 && upsample_left == 0 { - dav1d_ipred_z2_fill1_16bpc_neon( - dst.cast(), - stride, - buf.as_ptr().offset(top_offset).cast(), - buf.as_ptr().offset(left_offset).cast(), - width, - height, - dx, - dy, - ); - } else if upsample_above != 0 { - dav1d_ipred_z2_fill2_16bpc_neon( - dst.cast(), - stride, - buf.as_ptr().offset(top_offset).cast(), - buf.as_ptr().offset(left_offset).cast(), - width, - height, - dx, - dy, - ); - } else { - dav1d_ipred_z2_fill3_16bpc_neon( - dst.cast(), - stride, - buf.as_ptr().offset(top_offset).cast(), - buf.as_ptr().offset(left_offset).cast(), - width, - height, - dx, - dy, - ); - }; -} - #[cfg(all(feature = "asm", target_arch = "aarch64"))] unsafe extern "C" fn ipred_z1_neon_erased( dst: *mut DynPixel, diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 6bed603e7..fc4d988e6 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -46,10 +46,9 @@ use std::ffi::c_void; use crate::{ src::ipred::dav1d_ipred_pixel_set_8bpc_neon, src::ipred::dav1d_ipred_reverse_8bpc_neon, src::ipred::dav1d_ipred_z1_filter_edge_8bpc_neon, - src::ipred::dav1d_ipred_z1_upsample_edge_8bpc_neon, src::ipred::dav1d_ipred_z2_fill1_8bpc_neon, - src::ipred::dav1d_ipred_z2_fill2_8bpc_neon, src::ipred::dav1d_ipred_z2_fill3_8bpc_neon, - src::ipred::dav1d_ipred_z2_upsample_edge_8bpc_neon, src::ipred::dav1d_ipred_z3_fill1_8bpc_neon, + src::ipred::dav1d_ipred_z1_upsample_edge_8bpc_neon, src::ipred::dav1d_ipred_z3_fill1_8bpc_neon, src::ipred::dav1d_ipred_z3_fill2_8bpc_neon, src::ipred::ipred_z1_neon, + src::ipred::ipred_z2_neon, }; #[cfg(feature = "asm")] @@ -514,7 +513,7 @@ unsafe extern "C" fn ipred_z2_neon_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z2_neon( + ipred_z2_neon::( dst.cast(), stride, topleft_in.cast(), @@ -523,182 +522,10 @@ unsafe extern "C" fn ipred_z2_neon_erased( angle, max_width, max_height, + BitDepth8::new(()), ); } -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe fn ipred_z2_neon( - dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - max_width: c_int, - max_height: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle > 90 && angle < 180) { - unreachable!(); - } - let mut dy = dav1d_dr_intra_derivative[((angle - 90) >> 1) as usize] as c_int; - let mut dx = dav1d_dr_intra_derivative[((180 - angle) >> 1) as usize] as c_int; - let mut buf: [pixel; 3 * (64 + 1)] = [0; 3 * (64 + 1)]; // NOTE: C code doesn't initialize - // The asm can underread below the start of top[] and left[]; to avoid - // surprising behaviour, make sure this is within the allocated stack space. - let left_offset: isize = 2 * (64 + 1); - let top_offset: isize = 1 * (64 + 1); - let flipped_offset: isize = 0 * (64 + 1); - - let upsample_left = if enable_intra_edge_filter != 0 { - get_upsample(width + height, 180 - angle, is_sm) - } else { - 0 as c_int - }; - let upsample_above = if enable_intra_edge_filter != 0 { - get_upsample(width + height, angle - 90, is_sm) - } else { - 0 as c_int - }; - - if upsample_above != 0 { - dav1d_ipred_z2_upsample_edge_8bpc_neon( - buf.as_mut_ptr().offset(top_offset).cast(), - width, - topleft_in.cast(), - 8, - ); - dx <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, angle - 90, is_sm) - } else { - 0 as c_int - }; - - if filter_strength != 0 { - dav1d_ipred_z1_filter_edge_8bpc_neon( - buf.as_mut_ptr().offset(1 + top_offset).cast(), - cmp::min(max_width, width), - topleft_in.cast(), - width, - filter_strength, - ); - - if max_width < width { - memcpy( - buf.as_mut_ptr().offset(top_offset + 1 + max_width as isize) as *mut c_void, - topleft_in.offset(1 + max_width as isize) as *const c_void, - ((width - max_width) as usize).wrapping_mul(::core::mem::size_of::()), - ); - } - } else { - BitDepth8::pixel_copy( - &mut buf[1 + top_offset as usize..], - core::slice::from_raw_parts(topleft_in.offset(1), width as usize), - width as usize, - ); - } - } - - if upsample_left != 0 { - buf[flipped_offset as usize] = *topleft_in; - dav1d_ipred_reverse_8bpc_neon( - buf.as_mut_ptr().offset(1 + flipped_offset).cast(), - topleft_in.cast(), - height, - ); - dav1d_ipred_z2_upsample_edge_8bpc_neon( - buf.as_mut_ptr().offset(left_offset).cast(), - height, - buf.as_ptr().offset(flipped_offset).cast(), - 8, - ); - dy <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, 180 - angle, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - buf[flipped_offset as usize] = *topleft_in; - dav1d_ipred_reverse_8bpc_neon( - buf.as_mut_ptr().offset(1 + flipped_offset).cast(), - topleft_in.cast(), - height, - ); - dav1d_ipred_z1_filter_edge_8bpc_neon( - buf.as_mut_ptr().offset(1 + left_offset).cast(), - cmp::min(max_height, height), - buf.as_ptr().offset(flipped_offset).cast(), - height, - filter_strength, - ); - if max_height < height { - memcpy( - buf.as_mut_ptr() - .offset(left_offset + 1 + max_height as isize) - as *mut c_void, - buf.as_mut_ptr() - .offset(flipped_offset + 1 + max_height as isize) - as *const c_void, - ((height - max_height) as usize).wrapping_mul(::core::mem::size_of::()), - ); - } - } else { - dav1d_ipred_reverse_8bpc_neon( - buf.as_mut_ptr().offset(left_offset + 1).cast(), - topleft_in.cast(), - height, - ); - } - } - buf[top_offset as usize] = *topleft_in; - buf[left_offset as usize] = *topleft_in; - - if upsample_above != 0 && upsample_left != 0 { - unreachable!(); - } - - if upsample_above == 0 && upsample_left == 0 { - dav1d_ipred_z2_fill1_8bpc_neon( - dst.cast(), - stride, - buf.as_ptr().offset(top_offset).cast(), - buf.as_ptr().offset(left_offset).cast(), - width, - height, - dx, - dy, - ); - } else if upsample_above != 0 { - dav1d_ipred_z2_fill2_8bpc_neon( - dst.cast(), - stride, - buf.as_ptr().offset(top_offset).cast(), - buf.as_ptr().offset(left_offset).cast(), - width, - height, - dx, - dy, - ); - } else { - dav1d_ipred_z2_fill3_8bpc_neon( - dst.cast(), - stride, - buf.as_ptr().offset(top_offset).cast(), - buf.as_ptr().offset(left_offset).cast(), - width, - height, - dx, - dy, - ); - }; -} - #[cfg(all(feature = "asm", target_arch = "aarch64"))] unsafe extern "C" fn ipred_z1_neon_erased( dst: *mut DynPixel, From 8859113127686b9048528387a0271104ccb6b71f Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 17:33:47 -0800 Subject: [PATCH 40/49] `fn ipred_z3_neon`: Deduplicate w/ generics. --- src/ipred.rs | 131 +++++++++++++++++++++++++++++++++++++++++-- src/ipred_tmpl_16.rs | 131 +------------------------------------------ src/ipred_tmpl_8.rs | 129 +----------------------------------------- 3 files changed, 131 insertions(+), 260 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 53d8ebc5c..a41b91ee4 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -19,18 +19,18 @@ use std::ffi::c_ulonglong; use std::ffi::c_void; use std::slice; -#[cfg(feature = "bitdepth_8")] -use crate::include::common::bitdepth::BitDepth8; - -#[cfg(feature = "bitdepth_16")] -use crate::include::common::bitdepth::BitDepth16; - #[cfg(all(feature = "asm", target_arch = "aarch64"))] use crate::include::common::bitdepth::bd_fn; #[cfg(all(feature = "asm", target_arch = "aarch64"))] use ::to_method::To; +#[cfg(all(feature = "bitdepth_8", feature = "asm", target_arch = "aarch64"))] +use crate::include::common::bitdepth::BitDepth8; + +#[cfg(all(feature = "bitdepth_16", feature = "asm", target_arch = "aarch64"))] +use crate::include::common::bitdepth::BitDepth16; + pub type angular_ipred_fn = unsafe extern "C" fn( *mut DynPixel, ptrdiff_t, @@ -1966,3 +1966,122 @@ pub(crate) unsafe fn ipred_z2_neon( ); }; } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +pub(crate) unsafe fn ipred_z3_neon( + dst: *mut BD::Pixel, + stride: ptrdiff_t, + topleft_in: *const BD::Pixel, + width: c_int, + height: c_int, + mut angle: c_int, + _max_width: c_int, + _max_height: c_int, + bd: BD, +) { + let is_sm = angle >> 9 & 0x1 as c_int; + let enable_intra_edge_filter = angle >> 10; + angle &= 511 as c_int; + if !(angle > 180) { + unreachable!(); + } + let mut dy = dav1d_dr_intra_derivative[(270 - angle >> 1) as usize] as c_int; + let mut flipped: [BD::Pixel; 144] = [0.into(); 144]; + let mut left_out: [BD::Pixel; 286] = [0.into(); 286]; + let max_base_y; + let upsample_left = if enable_intra_edge_filter != 0 { + get_upsample(width + height, angle - 180, is_sm) + } else { + 0 as c_int + }; + if upsample_left != 0 { + flipped[0] = *topleft_in.offset(0); + bd_fn!(BD, ipred_reverse, neon)( + flipped.as_mut_ptr().offset(1).cast(), + topleft_in.offset(0).cast(), + height + cmp::max(width, height), + ); + bd_fn!(BD, ipred_z1_upsample_edge, neon)( + left_out.as_mut_ptr().cast(), + width + height, + flipped.as_mut_ptr().cast(), + height + cmp::min(width, height), + bd.into_c(), + ); + max_base_y = 2 * (width + height) - 2; + dy <<= 1; + } else { + let filter_strength = if enable_intra_edge_filter != 0 { + get_filter_strength(width + height, angle - 180, is_sm) + } else { + 0 as c_int + }; + if filter_strength != 0 { + flipped[0] = *topleft_in.offset(0); + bd_fn!(BD, ipred_reverse, neon)( + flipped.as_mut_ptr().offset(1).cast(), + topleft_in.offset(0).cast(), + height + cmp::max(width, height), + ); + bd_fn!(BD, ipred_z1_filter_edge, neon)( + left_out.as_mut_ptr().cast(), + width + height, + flipped.as_mut_ptr().cast(), + height + cmp::min(width, height), + filter_strength, + ); + max_base_y = width + height - 1; + } else { + bd_fn!(BD, ipred_reverse, neon)( + left_out.as_mut_ptr().cast(), + topleft_in.offset(0).cast(), + height + cmp::min(width, height), + ); + max_base_y = height + cmp::min(width, height) - 1; + } + } + let base_inc = 1 + upsample_left; + let pad_pixels = cmp::max(64 - max_base_y - 1, height + 15); + { + // `pixel_set` takes a `px: BD::Pixel`. + // Since it's not behind a ptr, we can't make it a `DynPixel` + // and call it uniformly with `bd_fn!`. + let out = left_out + .as_mut_ptr() + .offset((max_base_y + 1) as isize) + .cast(); + let px = left_out[max_base_y as usize]; + let n = (pad_pixels * base_inc) as c_int; + match BD::BPC { + BPC::BPC8 => dav1d_ipred_pixel_set_8bpc_neon( + out, + // Really a no-op cast, but it's difficult to do it properly with generics. + px.to::() as ::Pixel, + n, + ), + BPC::BPC16 => dav1d_ipred_pixel_set_16bpc_neon(out, px.into(), n), + } + } + if upsample_left != 0 { + bd_fn!(BD, ipred_z3_fill2, neon)( + dst.cast(), + stride, + left_out.as_mut_ptr().cast(), + width, + height, + dy, + max_base_y, + ); + } else { + bd_fn!(BD, ipred_z3_fill1, neon)( + dst.cast(), + stride, + left_out.as_mut_ptr().cast(), + width, + height, + dy, + max_base_y, + ); + }; +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 9baf73751..0dbd063c9 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -37,19 +37,11 @@ use crate::src::levels::VERT_PRED; use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -use libc::memcpy; use libc::ptrdiff_t; use std::ffi::c_int; -use std::ffi::c_void; #[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::{ - src::ipred::dav1d_ipred_pixel_set_16bpc_neon, src::ipred::dav1d_ipred_reverse_16bpc_neon, - src::ipred::dav1d_ipred_z1_filter_edge_16bpc_neon, - src::ipred::dav1d_ipred_z1_upsample_edge_16bpc_neon, - src::ipred::dav1d_ipred_z3_fill1_16bpc_neon, src::ipred::dav1d_ipred_z3_fill2_16bpc_neon, - src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, -}; +use crate::{src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, src::ipred::ipred_z3_neon}; #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; @@ -57,17 +49,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use std::cmp; - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::{ - src::ipred::get_filter_strength, src::ipred::get_upsample, - src::tables::dav1d_dr_intra_derivative, -}; - -pub type pixel = u16; - unsafe extern "C" fn ipred_z1_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -377,7 +358,7 @@ unsafe extern "C" fn ipred_z3_neon_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z3_neon( + ipred_z3_neon::( dst.cast(), stride, topleft_in.cast(), @@ -386,114 +367,8 @@ unsafe extern "C" fn ipred_z3_neon_erased( angle, max_width, max_height, - bitdepth_max, - ); -} - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe fn ipred_z3_neon( - dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - _max_width: c_int, - _max_height: c_int, - bitdepth_max: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle > 180) { - unreachable!(); - } - let mut dy = dav1d_dr_intra_derivative[(270 - angle >> 1) as usize] as c_int; - let mut flipped: [pixel; 144] = [0; 144]; - let mut left_out: [pixel; 286] = [0; 286]; - let max_base_y; - let upsample_left = if enable_intra_edge_filter != 0 { - get_upsample(width + height, angle - 180, is_sm) - } else { - 0 as c_int - }; - if upsample_left != 0 { - flipped[0] = *topleft_in.offset(0); - dav1d_ipred_reverse_16bpc_neon( - flipped.as_mut_ptr().offset(1).cast(), - topleft_in.offset(0).cast(), - height + cmp::max(width, height), - ); - dav1d_ipred_z1_upsample_edge_16bpc_neon( - left_out.as_mut_ptr().cast(), - width + height, - flipped.as_mut_ptr().cast(), - height + cmp::min(width, height), - bitdepth_max, - ); - max_base_y = 2 * (width + height) - 2; - dy <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, angle - 180, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - flipped[0] = *topleft_in.offset(0); - dav1d_ipred_reverse_16bpc_neon( - flipped.as_mut_ptr().offset(1).cast(), - topleft_in.offset(0).cast(), - height + cmp::max(width, height), - ); - dav1d_ipred_z1_filter_edge_16bpc_neon( - left_out.as_mut_ptr().cast(), - width + height, - flipped.as_mut_ptr().cast(), - height + cmp::min(width, height), - filter_strength, - ); - max_base_y = width + height - 1; - } else { - dav1d_ipred_reverse_16bpc_neon( - left_out.as_mut_ptr().cast(), - topleft_in.offset(0).cast(), - height + cmp::min(width, height), - ); - max_base_y = height + cmp::min(width, height) - 1; - } - } - let base_inc = 1 + upsample_left; - let pad_pixels = cmp::max(64 - max_base_y - 1, height + 15); - dav1d_ipred_pixel_set_16bpc_neon( - left_out - .as_mut_ptr() - .offset((max_base_y + 1) as isize) - .cast(), - left_out[max_base_y as usize], - (pad_pixels * base_inc) as c_int, + BitDepth16::from_c(bitdepth_max), ); - if upsample_left != 0 { - dav1d_ipred_z3_fill2_16bpc_neon( - dst.cast(), - stride, - left_out.as_mut_ptr().cast(), - width, - height, - dy, - max_base_y, - ); - } else { - dav1d_ipred_z3_fill1_16bpc_neon( - dst.cast(), - stride, - left_out.as_mut_ptr().cast(), - width, - height, - dy, - max_base_y, - ); - }; } #[cfg(all(feature = "asm", target_arch = "aarch64"))] diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index fc4d988e6..af099d190 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -37,19 +37,11 @@ use crate::src::levels::VERT_PRED; use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -use libc::memcpy; use libc::ptrdiff_t; use std::ffi::c_int; -use std::ffi::c_void; #[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::{ - src::ipred::dav1d_ipred_pixel_set_8bpc_neon, src::ipred::dav1d_ipred_reverse_8bpc_neon, - src::ipred::dav1d_ipred_z1_filter_edge_8bpc_neon, - src::ipred::dav1d_ipred_z1_upsample_edge_8bpc_neon, src::ipred::dav1d_ipred_z3_fill1_8bpc_neon, - src::ipred::dav1d_ipred_z3_fill2_8bpc_neon, src::ipred::ipred_z1_neon, - src::ipred::ipred_z2_neon, -}; +use crate::{src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, src::ipred::ipred_z3_neon}; #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; @@ -57,17 +49,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use std::cmp; - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::{ - src::ipred::get_filter_strength, src::ipred::get_upsample, - src::tables::dav1d_dr_intra_derivative, -}; - -pub type pixel = u8; - unsafe extern "C" fn ipred_z1_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -384,7 +365,7 @@ unsafe extern "C" fn ipred_z3_neon_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z3_neon( + ipred_z3_neon::( dst.cast(), stride, topleft_in.cast(), @@ -393,114 +374,10 @@ unsafe extern "C" fn ipred_z3_neon_erased( angle, max_width, max_height, + BitDepth8::new(()), ); } -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe fn ipred_z3_neon( - dst: *mut pixel, - stride: ptrdiff_t, - topleft_in: *const pixel, - width: c_int, - height: c_int, - mut angle: c_int, - _max_width: c_int, - _max_height: c_int, -) { - let is_sm = angle >> 9 & 0x1 as c_int; - let enable_intra_edge_filter = angle >> 10; - angle &= 511 as c_int; - if !(angle > 180) { - unreachable!(); - } - let mut dy = dav1d_dr_intra_derivative[(270 - angle >> 1) as usize] as c_int; - let mut flipped: [pixel; 144] = [0; 144]; - let mut left_out: [pixel; 286] = [0; 286]; - let max_base_y; - let upsample_left = if enable_intra_edge_filter != 0 { - get_upsample(width + height, angle - 180, is_sm) - } else { - 0 as c_int - }; - if upsample_left != 0 { - flipped[0] = *topleft_in.offset(0); - dav1d_ipred_reverse_8bpc_neon( - flipped.as_mut_ptr().offset(1).cast(), - &*topleft_in.offset(0).cast(), - height + cmp::max(width, height), - ); - dav1d_ipred_z1_upsample_edge_8bpc_neon( - left_out.as_mut_ptr().cast(), - width + height, - flipped.as_mut_ptr().cast(), - height + cmp::min(width, height), - 8, - ); - max_base_y = 2 * (width + height) - 2; - dy <<= 1; - } else { - let filter_strength = if enable_intra_edge_filter != 0 { - get_filter_strength(width + height, angle - 180, is_sm) - } else { - 0 as c_int - }; - if filter_strength != 0 { - flipped[0] = *topleft_in.offset(0); - dav1d_ipred_reverse_8bpc_neon( - flipped.as_mut_ptr().offset(1).cast(), - topleft_in.offset(0).cast(), - height + cmp::max(width, height), - ); - dav1d_ipred_z1_filter_edge_8bpc_neon( - left_out.as_mut_ptr().cast(), - width + height, - flipped.as_mut_ptr().cast(), - height + cmp::min(width, height), - filter_strength, - ); - max_base_y = width + height - 1; - } else { - dav1d_ipred_reverse_8bpc_neon( - left_out.as_mut_ptr().cast(), - topleft_in.offset(0).cast(), - height + cmp::min(width, height), - ); - max_base_y = height + cmp::min(width, height) - 1; - } - } - let base_inc = 1 + upsample_left; - let pad_pixels = cmp::max(64 - max_base_y - 1, height + 15); - dav1d_ipred_pixel_set_8bpc_neon( - left_out - .as_mut_ptr() - .offset((max_base_y + 1) as isize) - .cast(), - left_out[max_base_y as usize], - (pad_pixels * base_inc) as c_int, - ); - if upsample_left != 0 { - dav1d_ipred_z3_fill2_8bpc_neon( - dst.cast(), - stride, - left_out.as_mut_ptr().cast(), - width, - height, - dy, - max_base_y, - ); - } else { - dav1d_ipred_z3_fill1_8bpc_neon( - dst.cast(), - stride, - left_out.as_mut_ptr().cast(), - width, - height, - dy, - max_base_y, - ); - }; -} - #[cfg(all(feature = "asm", target_arch = "aarch64"))] unsafe extern "C" fn ipred_z2_neon_erased( dst: *mut DynPixel, From 534f9f201057d22b148200f57df3cd258a428d6e Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 20:12:45 -0800 Subject: [PATCH 41/49] `trait BitDepth`: Remove redundant turbofishes in generic calls. --- src/ipred.rs | 28 ++++++++++++++-------------- src/ipred_tmpl_16.rs | 14 +++++++------- src/ipred_tmpl_8.rs | 14 +++++++------- src/looprestoration.rs | 2 +- src/recon.rs | 10 +++++----- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index a41b91ee4..43d4e5757 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -489,7 +489,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_top_c_erased( _max_height: c_int, bitdepth_max: c_int, ) { - splat_dc::( + splat_dc( dst.cast(), stride, width, @@ -510,7 +510,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_top_c_erased( alpha: c_int, bitdepth_max: c_int, ) { - cfl_pred::( + cfl_pred( dst.cast(), stride, width, @@ -544,7 +544,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_left_c_erased( _max_height: c_int, bitdepth_max: c_int, ) { - splat_dc::( + splat_dc( dst.cast(), stride, width, @@ -566,7 +566,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_left_c_erased( bitdepth_max: c_int, ) { let dc: c_uint = dc_gen_left::(topleft.cast(), height); - cfl_pred::( + cfl_pred( dst.cast(), stride, width, @@ -619,7 +619,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_c_erased( _max_height: c_int, bitdepth_max: c_int, ) { - splat_dc::( + splat_dc( dst.cast(), stride, width, @@ -641,7 +641,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_c_erased( bitdepth_max: c_int, ) { let dc: c_uint = dc_gen::(topleft.cast(), width, height); - cfl_pred::( + cfl_pred( dst.cast(), stride, width, @@ -667,7 +667,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_128_c_erased( ) { let bd = BD::from_c(bitdepth_max); let dc = bd.bitdepth_max().as_::() + 1 >> 1; - splat_dc::(dst.cast(), stride, width, height, dc, bd); + splat_dc(dst.cast(), stride, width, height, dc, bd); } // TODO(kkysen) Temporarily pub until mod is deduplicated @@ -683,7 +683,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_128_c_erased( ) { let bd = BD::from_c(bitdepth_max); let dc = bd.bitdepth_max().as_::() + 1 >> 1; - cfl_pred::(dst.cast(), stride, width, height, dc, ac, alpha, bd); + cfl_pred(dst.cast(), stride, width, height, dc, ac, alpha, bd); } unsafe fn ipred_v_rust( @@ -723,7 +723,7 @@ pub(crate) unsafe extern "C" fn ipred_v_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_v_rust::( + ipred_v_rust( dst.cast(), stride, topleft.cast(), @@ -773,7 +773,7 @@ pub(crate) unsafe extern "C" fn ipred_h_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_h_rust::( + ipred_h_rust( dst.cast(), stride, topleft.cast(), @@ -835,7 +835,7 @@ pub(crate) unsafe extern "C" fn ipred_paeth_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_paeth_rust::( + ipred_paeth_rust( dst.cast(), stride, tl_ptr.cast(), @@ -893,7 +893,7 @@ pub(crate) unsafe extern "C" fn ipred_smooth_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_smooth_rust::( + ipred_smooth_rust( dst.cast(), stride, topleft.cast(), @@ -946,7 +946,7 @@ pub(crate) unsafe extern "C" fn ipred_smooth_v_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_smooth_v_rust::( + ipred_smooth_v_rust( dst.cast(), stride, topleft.cast(), @@ -999,7 +999,7 @@ pub(crate) unsafe extern "C" fn ipred_smooth_h_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_smooth_h_rust::( + ipred_smooth_h_rust( dst.cast(), stride, topleft.cast(), diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 0dbd063c9..d9f61a0e5 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -60,7 +60,7 @@ unsafe extern "C" fn ipred_z1_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z1_rust::( + ipred_z1_rust( dst.cast(), stride, topleft_in.cast(), @@ -84,7 +84,7 @@ unsafe extern "C" fn ipred_z2_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z2_rust::( + ipred_z2_rust( dst.cast(), stride, topleft_in.cast(), @@ -108,7 +108,7 @@ unsafe extern "C" fn ipred_z3_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z3_rust::( + ipred_z3_rust( dst.cast(), stride, topleft_in.cast(), @@ -132,7 +132,7 @@ unsafe extern "C" fn ipred_filter_c_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_filter_rust::( + ipred_filter_rust( dst.cast(), stride, topleft_in.cast(), @@ -358,7 +358,7 @@ unsafe extern "C" fn ipred_z3_neon_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z3_neon::( + ipred_z3_neon( dst.cast(), stride, topleft_in.cast(), @@ -383,7 +383,7 @@ unsafe extern "C" fn ipred_z2_neon_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z2_neon::( + ipred_z2_neon( dst.cast(), stride, topleft_in.cast(), @@ -408,7 +408,7 @@ unsafe extern "C" fn ipred_z1_neon_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z1_neon::( + ipred_z1_neon( dst.cast(), stride, topleft_in.cast(), diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index af099d190..b4c0b4edb 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -60,7 +60,7 @@ unsafe extern "C" fn ipred_z1_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z1_rust::( + ipred_z1_rust( dst.cast(), stride, topleft_in.cast(), @@ -84,7 +84,7 @@ unsafe extern "C" fn ipred_z2_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z2_rust::( + ipred_z2_rust( dst.cast(), stride, topleft_in.cast(), @@ -108,7 +108,7 @@ unsafe extern "C" fn ipred_z3_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z3_rust::( + ipred_z3_rust( dst.cast(), stride, topleft_in.cast(), @@ -132,7 +132,7 @@ unsafe extern "C" fn ipred_filter_c_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_filter_rust::( + ipred_filter_rust( dst.cast(), stride, topleft_in.cast(), @@ -365,7 +365,7 @@ unsafe extern "C" fn ipred_z3_neon_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z3_neon::( + ipred_z3_neon( dst.cast(), stride, topleft_in.cast(), @@ -390,7 +390,7 @@ unsafe extern "C" fn ipred_z2_neon_erased( max_height: c_int, _bitdepth_max: c_int, ) { - ipred_z2_neon::( + ipred_z2_neon( dst.cast(), stride, topleft_in.cast(), @@ -415,7 +415,7 @@ unsafe extern "C" fn ipred_z1_neon_erased( max_height: c_int, bitdepth_max: c_int, ) { - ipred_z1_neon::( + ipred_z1_neon( dst.cast(), stride, topleft_in.cast(), diff --git a/src/looprestoration.rs b/src/looprestoration.rs index 0a1d0eb50..844bf1bcb 100644 --- a/src/looprestoration.rs +++ b/src/looprestoration.rs @@ -279,7 +279,7 @@ unsafe extern "C" fn wiener_c_erased( bitdepth_max: c_int, ) { let bd = BD::from_c(bitdepth_max); - wiener_rust::( + wiener_rust( p.cast(), stride, left.cast(), diff --git a/src/recon.rs b/src/recon.rs index d967c44bd..d479ef29b 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -2662,7 +2662,7 @@ pub(crate) unsafe fn rav1d_recon_b_intra( top_sb_edge = top_sb_edge.offset(((*f).sb128w * 128 * (sby - 1)) as isize); } - m = rav1d_prepare_intra_edges::( + m = rav1d_prepare_intra_edges( t.bx, (t.bx > (*ts).tiling.col_start) as c_int, t.by, @@ -2871,7 +2871,7 @@ pub(crate) unsafe fn rav1d_recon_b_intra( let ypos = t.by >> ss_ver; let xstart = (*ts).tiling.col_start >> ss_hor; let ystart = (*ts).tiling.row_start >> ss_ver; - let m: IntraPredMode = rav1d_prepare_intra_edges::( + let m: IntraPredMode = rav1d_prepare_intra_edges( xpos, (xpos > xstart) as c_int, ypos, @@ -3072,7 +3072,7 @@ pub(crate) unsafe fn rav1d_recon_b_intra( ypos = t.by >> ss_ver; xstart = (*ts).tiling.col_start >> ss_hor; ystart = (*ts).tiling.row_start >> ss_ver; - m = rav1d_prepare_intra_edges::( + m = rav1d_prepare_intra_edges( xpos, (xpos > xstart) as c_int, ypos, @@ -3461,7 +3461,7 @@ pub(crate) unsafe fn rav1d_recon_b_inter( let sby = t.by >> (*f).sb_shift; top_sb_edge = top_sb_edge.offset(((*f).sb128w * 128 * (sby - 1)) as isize); } - m = rav1d_prepare_intra_edges::( + m = rav1d_prepare_intra_edges( t.bx, (t.bx > (*ts).tiling.col_start) as c_int, t.by, @@ -3882,7 +3882,7 @@ pub(crate) unsafe fn rav1d_recon_b_inter( top_sb_edge = top_sb_edge.offset(((*f).sb128w * 128 * (sby - 1)) as isize); } - m = rav1d_prepare_intra_edges::( + m = rav1d_prepare_intra_edges( t.bx >> ss_hor, (t.bx >> ss_hor > (*ts).tiling.col_start >> ss_hor) as c_int, t.by >> ss_ver, From 1f62e9f85ce0b17e0658bc3b9f2e545a0d6d2c1d Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 20:20:41 -0800 Subject: [PATCH 42/49] `fn ipred_z{1,2,3}_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 84 ++++++++++++++++++++++++++++++++++++++++---- src/ipred_tmpl_16.rs | 84 ++++---------------------------------------- src/ipred_tmpl_8.rs | 84 ++++---------------------------------------- 3 files changed, 90 insertions(+), 162 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 43d4e5757..c6f1aa60b 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1140,8 +1140,7 @@ unsafe fn upsample_edge( *out.offset((i * 2) as isize) = *in_0.offset(iclip(i, from, to - 1) as isize); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_z1_rust( +unsafe fn ipred_z1_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, topleft_in: *const BD::Pixel, @@ -1234,8 +1233,7 @@ pub(crate) unsafe fn ipred_z1_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_z2_rust( +unsafe fn ipred_z2_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, topleft_in: *const BD::Pixel, @@ -1372,8 +1370,7 @@ pub(crate) unsafe fn ipred_z2_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_z3_rust( +unsafe fn ipred_z3_rust( dst: *mut BD::Pixel, stride: ptrdiff_t, topleft_in: *const BD::Pixel, @@ -1472,6 +1469,81 @@ pub(crate) unsafe fn ipred_z3_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_z1_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft_in: *const DynPixel, + width: c_int, + height: c_int, + angle: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_z1_rust( + dst.cast(), + stride, + topleft_in.cast(), + width, + height, + angle, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_z2_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft_in: *const DynPixel, + width: c_int, + height: c_int, + angle: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_z2_rust( + dst.cast(), + stride, + topleft_in.cast(), + width, + height, + angle, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_z3_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft_in: *const DynPixel, + width: c_int, + height: c_int, + angle: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_z3_rust( + dst.cast(), + stride, + topleft_in.cast(), + width, + height, + angle, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated pub(crate) unsafe fn filter_fn( flt_ptr: *const i8, diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index d9f61a0e5..177f678b5 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -18,9 +18,9 @@ use crate::src::ipred::ipred_smooth_c_erased; use crate::src::ipred::ipred_smooth_h_c_erased; use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; -use crate::src::ipred::ipred_z1_rust; -use crate::src::ipred::ipred_z2_rust; -use crate::src::ipred::ipred_z3_rust; +use crate::src::ipred::ipred_z1_c_erased; +use crate::src::ipred::ipred_z2_c_erased; +use crate::src::ipred::ipred_z3_c_erased; use crate::src::ipred::pal_pred_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -49,78 +49,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -unsafe extern "C" fn ipred_z1_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_z1_rust( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -unsafe extern "C" fn ipred_z2_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_z2_rust( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -unsafe extern "C" fn ipred_z3_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_z3_rust( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe extern "C" fn ipred_filter_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -433,9 +361,9 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased::); (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased::); (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased::); - (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased); - (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased); - (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased); + (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased::); + (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased::); + (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased); (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased; diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index b4c0b4edb..bba45c37c 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -18,9 +18,9 @@ use crate::src::ipred::ipred_smooth_c_erased; use crate::src::ipred::ipred_smooth_h_c_erased; use crate::src::ipred::ipred_smooth_v_c_erased; use crate::src::ipred::ipred_v_c_erased; -use crate::src::ipred::ipred_z1_rust; -use crate::src::ipred::ipred_z2_rust; -use crate::src::ipred::ipred_z3_rust; +use crate::src::ipred::ipred_z1_c_erased; +use crate::src::ipred::ipred_z2_c_erased; +use crate::src::ipred::ipred_z3_c_erased; use crate::src::ipred::pal_pred_rust; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; @@ -49,78 +49,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -unsafe extern "C" fn ipred_z1_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_z1_rust( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -unsafe extern "C" fn ipred_z2_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_z2_rust( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -unsafe extern "C" fn ipred_z3_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_z3_rust( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth8::new(()), - ); -} - unsafe extern "C" fn ipred_filter_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -440,9 +368,9 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased::); (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased::); (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased::); - (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased); - (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased); - (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased); + (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased::); + (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased::); + (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased); (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased; From 7865b4a25e5791086e2cc87d34c6c856fb6112e2 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 20:26:10 -0800 Subject: [PATCH 43/49] `fn ipred_filter_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 28 ++++++++++++++++++++++++++-- src/ipred_tmpl_16.rs | 34 ++++++---------------------------- src/ipred_tmpl_8.rs | 34 ++++++---------------------------- 3 files changed, 38 insertions(+), 58 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index c6f1aa60b..ab1d8a7e6 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1584,8 +1584,7 @@ cfg_if! { } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn ipred_filter_rust( +unsafe fn ipred_filter_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, topleft_in: *const BD::Pixel, @@ -1643,6 +1642,31 @@ pub(crate) unsafe fn ipred_filter_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn ipred_filter_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft_in: *const DynPixel, + width: c_int, + height: c_int, + filt_idx: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_filter_rust( + dst.cast(), + stride, + topleft_in.cast(), + width, + height, + filt_idx, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] pub(crate) unsafe fn cfl_ac_rust( diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 177f678b5..2d6a71e6c 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -1,4 +1,3 @@ -use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; use crate::include::dav1d::headers::Rav1dPixelLayout; @@ -11,7 +10,7 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; -use crate::src::ipred::ipred_filter_rust; +use crate::src::ipred::ipred_filter_c_erased; use crate::src::ipred::ipred_h_c_erased; use crate::src::ipred::ipred_paeth_c_erased; use crate::src::ipred::ipred_smooth_c_erased; @@ -41,7 +40,10 @@ use libc::ptrdiff_t; use std::ffi::c_int; #[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::{src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, src::ipred::ipred_z3_neon}; +use crate::{ + include::common::bitdepth::BitDepth, src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, + src::ipred::ipred_z3_neon, +}; #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; @@ -49,30 +51,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -unsafe extern "C" fn ipred_filter_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - filt_idx: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_filter_rust( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - filt_idx, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - unsafe extern "C" fn cfl_ac_420_c_erased( ac: *mut i16, ypx: *const DynPixel, @@ -364,7 +342,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased::); (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased::); (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); - (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased); + (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased::); (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased; (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased; diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index bba45c37c..ac7ac1b76 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -1,4 +1,3 @@ -use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; use crate::include::dav1d::headers::Rav1dPixelLayout; @@ -11,7 +10,7 @@ use crate::src::ipred::ipred_dc_128_c_erased; use crate::src::ipred::ipred_dc_c_erased; use crate::src::ipred::ipred_dc_left_c_erased; use crate::src::ipred::ipred_dc_top_c_erased; -use crate::src::ipred::ipred_filter_rust; +use crate::src::ipred::ipred_filter_c_erased; use crate::src::ipred::ipred_h_c_erased; use crate::src::ipred::ipred_paeth_c_erased; use crate::src::ipred::ipred_smooth_c_erased; @@ -41,7 +40,10 @@ use libc::ptrdiff_t; use std::ffi::c_int; #[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::{src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, src::ipred::ipred_z3_neon}; +use crate::{ + include::common::bitdepth::BitDepth, src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, + src::ipred::ipred_z3_neon, +}; #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; @@ -49,30 +51,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -unsafe extern "C" fn ipred_filter_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - filt_idx: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_filter_rust( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - filt_idx, - max_width, - max_height, - BitDepth8::new(()), - ); -} - unsafe extern "C" fn cfl_ac_420_c_erased( ac: *mut i16, ypx: *const DynPixel, @@ -371,7 +349,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased::); (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased::); (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); - (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased); + (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased::); (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased; (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased; From 5e0786264ee357a5fe2817e3018ce81d71e0013e Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 20:29:20 -0800 Subject: [PATCH 44/49] `fn cfl_ac_4{20,22,44}_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 72 +++++++++++++++++++++++++++++++++++++++-- src/ipred_tmpl_16.rs | 76 ++++---------------------------------------- src/ipred_tmpl_8.rs | 76 ++++---------------------------------------- 3 files changed, 82 insertions(+), 142 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index ab1d8a7e6..81357958b 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1667,9 +1667,8 @@ pub(crate) unsafe extern "C" fn ipred_filter_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] -pub(crate) unsafe fn cfl_ac_rust( +unsafe fn cfl_ac_rust( mut ac: *mut i16, mut ypx: *const BD::Pixel, stride: ptrdiff_t, @@ -1758,6 +1757,75 @@ pub(crate) unsafe fn cfl_ac_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn cfl_ac_420_c_erased( + ac: *mut i16, + ypx: *const DynPixel, + stride: ptrdiff_t, + w_pad: c_int, + h_pad: c_int, + cw: c_int, + ch: c_int, +) { + cfl_ac_rust::( + ac, + ypx.cast(), + stride, + w_pad, + h_pad, + cw, + ch, + 1 as c_int, + 1 as c_int, + ); +} + +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn cfl_ac_422_c_erased( + ac: *mut i16, + ypx: *const DynPixel, + stride: ptrdiff_t, + w_pad: c_int, + h_pad: c_int, + cw: c_int, + ch: c_int, +) { + cfl_ac_rust::( + ac, + ypx.cast(), + stride, + w_pad, + h_pad, + cw, + ch, + 1 as c_int, + 0 as c_int, + ); +} + +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn cfl_ac_444_c_erased( + ac: *mut i16, + ypx: *const DynPixel, + stride: ptrdiff_t, + w_pad: c_int, + h_pad: c_int, + cw: c_int, + ch: c_int, +) { + cfl_ac_rust::( + ac, + ypx.cast(), + stride, + w_pad, + h_pad, + cw, + ch, + 0 as c_int, + 0 as c_int, + ); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated pub(crate) unsafe fn pal_pred_rust( mut dst: *mut BD::Pixel, diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 2d6a71e6c..6ee5f7b7b 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -1,7 +1,9 @@ use crate::include::common::bitdepth::BitDepth16; use crate::include::common::bitdepth::DynPixel; use crate::include::dav1d::headers::Rav1dPixelLayout; -use crate::src::ipred::cfl_ac_rust; +use crate::src::ipred::cfl_ac_420_c_erased; +use crate::src::ipred::cfl_ac_422_c_erased; +use crate::src::ipred::cfl_ac_444_c_erased; use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; @@ -51,72 +53,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -unsafe extern "C" fn cfl_ac_420_c_erased( - ac: *mut i16, - ypx: *const DynPixel, - stride: ptrdiff_t, - w_pad: c_int, - h_pad: c_int, - cw: c_int, - ch: c_int, -) { - cfl_ac_rust::( - ac, - ypx.cast(), - stride, - w_pad, - h_pad, - cw, - ch, - 1 as c_int, - 1 as c_int, - ); -} - -unsafe extern "C" fn cfl_ac_422_c_erased( - ac: *mut i16, - ypx: *const DynPixel, - stride: ptrdiff_t, - w_pad: c_int, - h_pad: c_int, - cw: c_int, - ch: c_int, -) { - cfl_ac_rust::( - ac, - ypx.cast(), - stride, - w_pad, - h_pad, - cw, - ch, - 1 as c_int, - 0 as c_int, - ); -} - -unsafe extern "C" fn cfl_ac_444_c_erased( - ac: *mut i16, - ypx: *const DynPixel, - stride: ptrdiff_t, - w_pad: c_int, - h_pad: c_int, - cw: c_int, - ch: c_int, -) { - cfl_ac_rust::( - ac, - ypx.cast(), - stride, - w_pad, - h_pad, - cw, - ch, - 0 as c_int, - 0 as c_int, - ); -} - unsafe extern "C" fn pal_pred_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -344,9 +280,9 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased::); - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased; + (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased::; + (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased::; + (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased::; (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased::; diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index ac7ac1b76..b9cdd8ee4 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -1,7 +1,9 @@ use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynPixel; use crate::include::dav1d::headers::Rav1dPixelLayout; -use crate::src::ipred::cfl_ac_rust; +use crate::src::ipred::cfl_ac_420_c_erased; +use crate::src::ipred::cfl_ac_422_c_erased; +use crate::src::ipred::cfl_ac_444_c_erased; use crate::src::ipred::ipred_cfl_128_c_erased; use crate::src::ipred::ipred_cfl_c_erased; use crate::src::ipred::ipred_cfl_left_c_erased; @@ -51,72 +53,6 @@ use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -unsafe extern "C" fn cfl_ac_420_c_erased( - ac: *mut i16, - ypx: *const DynPixel, - stride: ptrdiff_t, - w_pad: c_int, - h_pad: c_int, - cw: c_int, - ch: c_int, -) { - cfl_ac_rust::( - ac, - ypx.cast(), - stride, - w_pad, - h_pad, - cw, - ch, - 1 as c_int, - 1 as c_int, - ); -} - -unsafe extern "C" fn cfl_ac_422_c_erased( - ac: *mut i16, - ypx: *const DynPixel, - stride: ptrdiff_t, - w_pad: c_int, - h_pad: c_int, - cw: c_int, - ch: c_int, -) { - cfl_ac_rust::( - ac, - ypx.cast(), - stride, - w_pad, - h_pad, - cw, - ch, - 1 as c_int, - 0 as c_int, - ); -} - -unsafe extern "C" fn cfl_ac_444_c_erased( - ac: *mut i16, - ypx: *const DynPixel, - stride: ptrdiff_t, - w_pad: c_int, - h_pad: c_int, - cw: c_int, - ch: c_int, -) { - cfl_ac_rust::( - ac, - ypx.cast(), - stride, - w_pad, - h_pad, - cw, - ch, - 0 as c_int, - 0 as c_int, - ); -} - unsafe extern "C" fn pal_pred_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, @@ -351,9 +287,9 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased::); - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased; + (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased::; + (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased::; + (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased::; (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased::; (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; From e3368f8e2b9e8c4707509d4a571684bb4fda9a6d Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 20:33:32 -0800 Subject: [PATCH 45/49] `fn pal_pred_c_erased`: Deduplicate w/ generics. --- src/ipred.rs | 15 +++++++++++++-- src/ipred_tmpl_16.rs | 25 +++++++------------------ src/ipred_tmpl_8.rs | 25 +++++++------------------ 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 81357958b..820c277f3 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1826,8 +1826,7 @@ pub(crate) unsafe extern "C" fn cfl_ac_444_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn pal_pred_rust( +unsafe fn pal_pred_rust( mut dst: *mut BD::Pixel, stride: ptrdiff_t, pal: *const u16, @@ -1849,6 +1848,18 @@ pub(crate) unsafe fn pal_pred_rust( } } +// TODO(kkysen) Temporarily pub until mod is deduplicated +pub(crate) unsafe extern "C" fn pal_pred_c_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + pal: *const u16, + idx: *const u8, + w: c_int, + h: c_int, +) { + pal_pred_rust::(dst.cast(), stride, pal, idx, w, h); +} + // TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", target_arch = "aarch64"))] pub(crate) unsafe fn ipred_z1_neon( diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 6ee5f7b7b..081d3f2e8 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -1,5 +1,4 @@ use crate::include::common::bitdepth::BitDepth16; -use crate::include::common::bitdepth::DynPixel; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_ac_420_c_erased; use crate::src::ipred::cfl_ac_422_c_erased; @@ -22,7 +21,7 @@ use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::ipred_z1_c_erased; use crate::src::ipred::ipred_z2_c_erased; use crate::src::ipred::ipred_z3_c_erased; -use crate::src::ipred::pal_pred_rust; +use crate::src::ipred::pal_pred_c_erased; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -38,32 +37,22 @@ use crate::src::levels::VERT_PRED; use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -use libc::ptrdiff_t; -use std::ffi::c_int; #[cfg(all(feature = "asm", target_arch = "aarch64"))] use crate::{ - include::common::bitdepth::BitDepth, src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, - src::ipred::ipred_z3_neon, + include::common::bitdepth::BitDepth, include::common::bitdepth::DynPixel, + src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, src::ipred::ipred_z3_neon, }; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use ::{libc::ptrdiff_t, std::ffi::c_int}; + #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -unsafe extern "C" fn pal_pred_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - pal: *const u16, - idx: *const u8, - w: c_int, - h: c_int, -) { - pal_pred_rust::(dst.cast(), stride, pal, idx, w, h); -} - #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))] #[inline(always)] unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPredDSPContext) { @@ -289,7 +278,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; - (*c).pal_pred = pal_pred_c_erased; + (*c).pal_pred = pal_pred_c_erased::; #[cfg(feature = "asm")] cfg_if! { diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index b9cdd8ee4..d355c7832 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -1,5 +1,4 @@ use crate::include::common::bitdepth::BitDepth8; -use crate::include::common::bitdepth::DynPixel; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::src::ipred::cfl_ac_420_c_erased; use crate::src::ipred::cfl_ac_422_c_erased; @@ -22,7 +21,7 @@ use crate::src::ipred::ipred_v_c_erased; use crate::src::ipred::ipred_z1_c_erased; use crate::src::ipred::ipred_z2_c_erased; use crate::src::ipred::ipred_z3_c_erased; -use crate::src::ipred::pal_pred_rust; +use crate::src::ipred::pal_pred_c_erased; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::levels::DC_128_PRED; use crate::src::levels::DC_PRED; @@ -38,32 +37,22 @@ use crate::src::levels::VERT_PRED; use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -use libc::ptrdiff_t; -use std::ffi::c_int; #[cfg(all(feature = "asm", target_arch = "aarch64"))] use crate::{ - include::common::bitdepth::BitDepth, src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, - src::ipred::ipred_z3_neon, + include::common::bitdepth::BitDepth, include::common::bitdepth::DynPixel, + src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, src::ipred::ipred_z3_neon, }; +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +use ::{libc::ptrdiff_t, std::ffi::c_int}; + #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -unsafe extern "C" fn pal_pred_c_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - pal: *const u16, - idx: *const u8, - w: c_int, - h: c_int, -) { - pal_pred_rust::(dst.cast(), stride, pal, idx, w, h); -} - #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))] #[inline(always)] unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPredDSPContext) { @@ -295,7 +284,7 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; - (*c).pal_pred = pal_pred_c_erased; + (*c).pal_pred = pal_pred_c_erased::; #[cfg(feature = "asm")] cfg_if! { From 59cf2a8e7eb0de05c391b2627f76f8eac59461b5 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 20:54:50 -0800 Subject: [PATCH 46/49] `fn ipred_z{1,2,3}_neon_erased`: Deduplicate w/ generics. --- src/ipred.rs | 87 +++++++++++++++++++++++++++++++++++++++--- src/ipred_tmpl_16.rs | 90 ++------------------------------------------ src/ipred_tmpl_8.rs | 90 ++------------------------------------------ 3 files changed, 87 insertions(+), 180 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 820c277f3..b16ea350b 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -1860,9 +1860,8 @@ pub(crate) unsafe extern "C" fn pal_pred_c_erased( pal_pred_rust::(dst.cast(), stride, pal, idx, w, h); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", target_arch = "aarch64"))] -pub(crate) unsafe fn ipred_z1_neon( +unsafe fn ipred_z1_neon( dst: *mut BD::Pixel, stride: ptrdiff_t, topleft_in: *const BD::Pixel, @@ -1964,9 +1963,8 @@ pub(crate) unsafe fn ipred_z1_neon( }; } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", target_arch = "aarch64"))] -pub(crate) unsafe fn ipred_z2_neon( +unsafe fn ipred_z2_neon( dst: *mut BD::Pixel, stride: ptrdiff_t, topleft_in: *const BD::Pixel, @@ -2142,9 +2140,8 @@ pub(crate) unsafe fn ipred_z2_neon( }; } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", target_arch = "aarch64"))] -pub(crate) unsafe fn ipred_z3_neon( +unsafe fn ipred_z3_neon( dst: *mut BD::Pixel, stride: ptrdiff_t, topleft_in: *const BD::Pixel, @@ -2260,3 +2257,81 @@ pub(crate) unsafe fn ipred_z3_neon( ); }; } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +pub(crate) unsafe extern "C" fn ipred_z1_neon_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft_in: *const DynPixel, + width: c_int, + height: c_int, + angle: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_z1_neon( + dst.cast(), + stride, + topleft_in.cast(), + width, + height, + angle, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +pub(crate) unsafe extern "C" fn ipred_z2_neon_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft_in: *const DynPixel, + width: c_int, + height: c_int, + angle: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_z2_neon( + dst.cast(), + stride, + topleft_in.cast(), + width, + height, + angle, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "asm", target_arch = "aarch64"))] +pub(crate) unsafe extern "C" fn ipred_z3_neon_erased( + dst: *mut DynPixel, + stride: ptrdiff_t, + topleft_in: *const DynPixel, + width: c_int, + height: c_int, + angle: c_int, + max_width: c_int, + max_height: c_int, + bitdepth_max: c_int, +) { + ipred_z3_neon( + dst.cast(), + stride, + topleft_in.cast(), + width, + height, + angle, + max_width, + max_height, + BD::from_c(bitdepth_max), + ); +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 081d3f2e8..3a9419910 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -38,15 +38,6 @@ use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::{ - include::common::bitdepth::BitDepth, include::common::bitdepth::DynPixel, - src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, src::ipred::ipred_z3_neon, -}; - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use ::{libc::ptrdiff_t, std::ffi::c_int}; - #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; @@ -159,9 +150,9 @@ unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_16bpc_neon); #[cfg(target_arch = "aarch64")] { - (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_neon_erased); - (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_neon_erased); - (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_neon_erased); + (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_neon_erased::); + (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_neon_erased::); + (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_neon_erased::); } (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_16bpc_neon); @@ -177,81 +168,6 @@ unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { (*c).pal_pred = dav1d_pal_pred_16bpc_neon; } -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe extern "C" fn ipred_z3_neon_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_z3_neon( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe extern "C" fn ipred_z2_neon_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_z2_neon( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe extern "C" fn ipred_z1_neon_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_z1_neon( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth16::from_c(bitdepth_max), - ); -} - #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index d355c7832..8c3f03bdc 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -38,15 +38,6 @@ use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use crate::{ - include::common::bitdepth::BitDepth, include::common::bitdepth::DynPixel, - src::ipred::ipred_z1_neon, src::ipred::ipred_z2_neon, src::ipred::ipred_z3_neon, -}; - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -use ::{libc::ptrdiff_t, std::ffi::c_int}; - #[cfg(feature = "asm")] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; @@ -166,9 +157,9 @@ unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_8bpc_neon); #[cfg(target_arch = "aarch64")] { - (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_neon_erased); - (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_neon_erased); - (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_neon_erased); + (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_neon_erased::); + (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_neon_erased::); + (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_neon_erased::); } (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_8bpc_neon); @@ -184,81 +175,6 @@ unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { (*c).pal_pred = dav1d_pal_pred_8bpc_neon; } -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe extern "C" fn ipred_z3_neon_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_z3_neon( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe extern "C" fn ipred_z2_neon_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - _bitdepth_max: c_int, -) { - ipred_z2_neon( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth8::new(()), - ); -} - -#[cfg(all(feature = "asm", target_arch = "aarch64"))] -unsafe extern "C" fn ipred_z1_neon_erased( - dst: *mut DynPixel, - stride: ptrdiff_t, - topleft_in: *const DynPixel, - width: c_int, - height: c_int, - angle: c_int, - max_width: c_int, - max_height: c_int, - bitdepth_max: c_int, -) { - ipred_z1_neon( - dst.cast(), - stride, - topleft_in.cast(), - width, - height, - angle, - max_width, - max_height, - BitDepth8::from_c(bitdepth_max), - ); -} - #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); From d9f2c2de81b08e25c1bcad5b221517d0caff8652 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 21:12:08 -0800 Subject: [PATCH 47/49] `fn intra_pred_dsp_init_x86`: Deduplicate w/ generics. --- src/ipred.rs | 113 ++++++++++++++++++++++++++++++++++++++++++- src/ipred_tmpl_16.rs | 89 ++-------------------------------- src/ipred_tmpl_8.rs | 95 ++---------------------------------- 3 files changed, 120 insertions(+), 177 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index b16ea350b..5c845cf08 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -19,9 +19,29 @@ use std::ffi::c_ulonglong; use std::ffi::c_void; use std::slice; -#[cfg(all(feature = "asm", target_arch = "aarch64"))] +#[cfg(feature = "asm")] use crate::include::common::bitdepth::bd_fn; +#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] +use crate::{ + include::dav1d::headers::Rav1dPixelLayout, + src::cpu::{rav1d_get_cpu_flags, CpuFlags}, + src::levels::DC_128_PRED, + src::levels::DC_PRED, + src::levels::FILTER_PRED, + src::levels::HOR_PRED, + src::levels::LEFT_DC_PRED, + src::levels::PAETH_PRED, + src::levels::SMOOTH_H_PRED, + src::levels::SMOOTH_PRED, + src::levels::SMOOTH_V_PRED, + src::levels::TOP_DC_PRED, + src::levels::VERT_PRED, + src::levels::Z1_PRED, + src::levels::Z2_PRED, + src::levels::Z3_PRED, +}; + #[cfg(all(feature = "asm", target_arch = "aarch64"))] use ::to_method::To; @@ -2335,3 +2355,94 @@ pub(crate) unsafe extern "C" fn ipred_z3_neon_erased( BD::from_c(bitdepth_max), ); } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))] +#[inline(always)] +pub(crate) unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPredDSPContext) { + let flags = rav1d_get_cpu_flags(); + + if !flags.contains(CpuFlags::SSSE3) { + return; + } + + (*c).intra_pred[DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc, ssse3)); + (*c).intra_pred[DC_128_PRED as usize] = Some(bd_fn!(BD, ipred_dc_128, ssse3)); + (*c).intra_pred[TOP_DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc_top, ssse3)); + (*c).intra_pred[LEFT_DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc_left, ssse3)); + (*c).intra_pred[HOR_PRED as usize] = Some(bd_fn!(BD, ipred_h, ssse3)); + (*c).intra_pred[VERT_PRED as usize] = Some(bd_fn!(BD, ipred_v, ssse3)); + (*c).intra_pred[PAETH_PRED as usize] = Some(bd_fn!(BD, ipred_paeth, ssse3)); + (*c).intra_pred[SMOOTH_PRED as usize] = Some(bd_fn!(BD, ipred_smooth, ssse3)); + (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(bd_fn!(BD, ipred_smooth_h, ssse3)); + (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(bd_fn!(BD, ipred_smooth_v, ssse3)); + (*c).intra_pred[Z1_PRED as usize] = Some(bd_fn!(BD, ipred_z1, ssse3)); + (*c).intra_pred[Z2_PRED as usize] = Some(bd_fn!(BD, ipred_z2, ssse3)); + (*c).intra_pred[Z3_PRED as usize] = Some(bd_fn!(BD, ipred_z3, ssse3)); + (*c).intra_pred[FILTER_PRED as usize] = Some(bd_fn!(BD, ipred_filter, ssse3)); + + (*c).cfl_pred[DC_PRED as usize] = bd_fn!(BD, ipred_cfl, ssse3); + (*c).cfl_pred[DC_128_PRED as usize] = bd_fn!(BD, ipred_cfl_128, ssse3); + (*c).cfl_pred[TOP_DC_PRED as usize] = bd_fn!(BD, ipred_cfl_top, ssse3); + (*c).cfl_pred[LEFT_DC_PRED as usize] = bd_fn!(BD, ipred_cfl_left, ssse3); + + (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_420, ssse3); + (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_422, ssse3); + (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_444, ssse3); + + (*c).pal_pred = bd_fn!(BD, pal_pred, ssse3); + + #[cfg(target_arch = "x86_64")] + { + if !flags.contains(CpuFlags::AVX2) { + return; + } + + (*c).intra_pred[DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc, avx2)); + (*c).intra_pred[DC_128_PRED as usize] = Some(bd_fn!(BD, ipred_dc_128, avx2)); + (*c).intra_pred[TOP_DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc_top, avx2)); + (*c).intra_pred[LEFT_DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc_left, avx2)); + (*c).intra_pred[HOR_PRED as usize] = Some(bd_fn!(BD, ipred_h, avx2)); + (*c).intra_pred[VERT_PRED as usize] = Some(bd_fn!(BD, ipred_v, avx2)); + (*c).intra_pred[PAETH_PRED as usize] = Some(bd_fn!(BD, ipred_paeth, avx2)); + (*c).intra_pred[SMOOTH_PRED as usize] = Some(bd_fn!(BD, ipred_smooth, avx2)); + (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(bd_fn!(BD, ipred_smooth_h, avx2)); + (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(bd_fn!(BD, ipred_smooth_v, avx2)); + (*c).intra_pred[Z1_PRED as usize] = Some(bd_fn!(BD, ipred_z1, avx2)); + (*c).intra_pred[Z2_PRED as usize] = Some(bd_fn!(BD, ipred_z2, avx2)); + (*c).intra_pred[Z3_PRED as usize] = Some(bd_fn!(BD, ipred_z3, avx2)); + (*c).intra_pred[FILTER_PRED as usize] = Some(bd_fn!(BD, ipred_filter, avx2)); + + (*c).cfl_pred[DC_PRED as usize] = bd_fn!(BD, ipred_cfl, avx2); + (*c).cfl_pred[DC_128_PRED as usize] = bd_fn!(BD, ipred_cfl_128, avx2); + (*c).cfl_pred[TOP_DC_PRED as usize] = bd_fn!(BD, ipred_cfl_top, avx2); + (*c).cfl_pred[LEFT_DC_PRED as usize] = bd_fn!(BD, ipred_cfl_left, avx2); + + (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_420, avx2); + (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_422, avx2); + (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_444, avx2); + + (*c).pal_pred = bd_fn!(BD, pal_pred, avx2); + + if !flags.contains(CpuFlags::AVX512ICL) { + return; + } + + if BD::BPC == BPC::BPC8 { + (*c).intra_pred[DC_PRED as usize] = Some(dav1d_ipred_dc_8bpc_avx512icl); + (*c).intra_pred[DC_128_PRED as usize] = Some(dav1d_ipred_dc_128_8bpc_avx512icl); + (*c).intra_pred[TOP_DC_PRED as usize] = Some(dav1d_ipred_dc_top_8bpc_avx512icl); + (*c).intra_pred[LEFT_DC_PRED as usize] = Some(dav1d_ipred_dc_left_8bpc_avx512icl); + (*c).intra_pred[HOR_PRED as usize] = Some(dav1d_ipred_h_8bpc_avx512icl); + (*c).intra_pred[VERT_PRED as usize] = Some(dav1d_ipred_v_8bpc_avx512icl); + } + + (*c).intra_pred[PAETH_PRED as usize] = Some(bd_fn!(BD, ipred_paeth, avx512icl)); + (*c).intra_pred[SMOOTH_PRED as usize] = Some(bd_fn!(BD, ipred_smooth, avx512icl)); + (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(bd_fn!(BD, ipred_smooth_h, avx512icl)); + (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(bd_fn!(BD, ipred_smooth_v, avx512icl)); + (*c).intra_pred[FILTER_PRED as usize] = Some(bd_fn!(BD, ipred_filter, avx512icl)); + + (*c).pal_pred = bd_fn!(BD, pal_pred, avx512icl); + } +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 3a9419910..8b4d8a096 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -38,95 +38,12 @@ use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -#[cfg(feature = "asm")] +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))] -#[inline(always)] -unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPredDSPContext) { - use crate::src::ipred::*; // TODO(legare): Temporary import until init fns are deduplicated. - - let flags = rav1d_get_cpu_flags(); - - if !flags.contains(CpuFlags::SSSE3) { - return; - } - - (*c).intra_pred[DC_PRED as usize] = Some(dav1d_ipred_dc_16bpc_ssse3); - (*c).intra_pred[DC_128_PRED as usize] = Some(dav1d_ipred_dc_128_16bpc_ssse3); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(dav1d_ipred_dc_top_16bpc_ssse3); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(dav1d_ipred_dc_left_16bpc_ssse3); - (*c).intra_pred[HOR_PRED as usize] = Some(dav1d_ipred_h_16bpc_ssse3); - (*c).intra_pred[VERT_PRED as usize] = Some(dav1d_ipred_v_16bpc_ssse3); - (*c).intra_pred[PAETH_PRED as usize] = Some(dav1d_ipred_paeth_16bpc_ssse3); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(dav1d_ipred_smooth_16bpc_ssse3); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_16bpc_ssse3); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(dav1d_ipred_smooth_v_16bpc_ssse3); - (*c).intra_pred[Z1_PRED as usize] = Some(dav1d_ipred_z1_16bpc_ssse3); - (*c).intra_pred[Z2_PRED as usize] = Some(dav1d_ipred_z2_16bpc_ssse3); - (*c).intra_pred[Z3_PRED as usize] = Some(dav1d_ipred_z3_16bpc_ssse3); - (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_16bpc_ssse3); - - (*c).cfl_pred[DC_PRED as usize] = dav1d_ipred_cfl_16bpc_ssse3; - (*c).cfl_pred[DC_128_PRED as usize] = dav1d_ipred_cfl_128_16bpc_ssse3; - (*c).cfl_pred[TOP_DC_PRED as usize] = dav1d_ipred_cfl_top_16bpc_ssse3; - (*c).cfl_pred[LEFT_DC_PRED as usize] = dav1d_ipred_cfl_left_16bpc_ssse3; - - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = dav1d_ipred_cfl_ac_420_16bpc_ssse3; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = dav1d_ipred_cfl_ac_422_16bpc_ssse3; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = dav1d_ipred_cfl_ac_444_16bpc_ssse3; - - (*c).pal_pred = dav1d_pal_pred_16bpc_ssse3; - - #[cfg(target_arch = "x86_64")] - { - if !flags.contains(CpuFlags::AVX2) { - return; - } - - (*c).intra_pred[DC_PRED as usize] = Some(dav1d_ipred_dc_16bpc_avx2); - (*c).intra_pred[DC_128_PRED as usize] = Some(dav1d_ipred_dc_128_16bpc_avx2); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(dav1d_ipred_dc_top_16bpc_avx2); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(dav1d_ipred_dc_left_16bpc_avx2); - (*c).intra_pred[HOR_PRED as usize] = Some(dav1d_ipred_h_16bpc_avx2); - (*c).intra_pred[VERT_PRED as usize] = Some(dav1d_ipred_v_16bpc_avx2); - (*c).intra_pred[PAETH_PRED as usize] = Some(dav1d_ipred_paeth_16bpc_avx2); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(dav1d_ipred_smooth_16bpc_avx2); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_16bpc_avx2); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(dav1d_ipred_smooth_v_16bpc_avx2); - (*c).intra_pred[Z1_PRED as usize] = Some(dav1d_ipred_z1_16bpc_avx2); - (*c).intra_pred[Z2_PRED as usize] = Some(dav1d_ipred_z2_16bpc_avx2); - (*c).intra_pred[Z3_PRED as usize] = Some(dav1d_ipred_z3_16bpc_avx2); - (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_16bpc_avx2); - - (*c).cfl_pred[DC_PRED as usize] = dav1d_ipred_cfl_16bpc_avx2; - (*c).cfl_pred[DC_128_PRED as usize] = dav1d_ipred_cfl_128_16bpc_avx2; - (*c).cfl_pred[TOP_DC_PRED as usize] = dav1d_ipred_cfl_top_16bpc_avx2; - (*c).cfl_pred[LEFT_DC_PRED as usize] = dav1d_ipred_cfl_left_16bpc_avx2; - - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = dav1d_ipred_cfl_ac_420_16bpc_avx2; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = dav1d_ipred_cfl_ac_422_16bpc_avx2; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = dav1d_ipred_cfl_ac_444_16bpc_avx2; - - (*c).pal_pred = dav1d_pal_pred_16bpc_avx2; - - if !flags.contains(CpuFlags::AVX512ICL) { - return; - } - - (*c).intra_pred[PAETH_PRED as usize] = Some(dav1d_ipred_paeth_16bpc_avx512icl); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(dav1d_ipred_smooth_16bpc_avx512icl); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_16bpc_avx512icl); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(dav1d_ipred_smooth_v_16bpc_avx512icl); - (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_16bpc_avx512icl); - - (*c).pal_pred = dav1d_pal_pred_16bpc_avx512icl; - } -} - #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] #[inline(always)] unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { @@ -199,7 +116,9 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) #[cfg(feature = "asm")] cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - intra_pred_dsp_init_x86(c); + use crate::src::ipred::intra_pred_dsp_init_x86; + + intra_pred_dsp_init_x86::(c); } else if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { intra_pred_dsp_init_arm(c); } diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index 8c3f03bdc..f029e5003 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -38,101 +38,12 @@ use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -#[cfg(feature = "asm")] +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))] -#[inline(always)] -unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPredDSPContext) { - use crate::src::ipred::*; // TODO(legare): Temporary import until init fns are deduplicated. - - let flags = rav1d_get_cpu_flags(); - - if !flags.contains(CpuFlags::SSSE3) { - return; - } - - (*c).intra_pred[DC_PRED as usize] = Some(dav1d_ipred_dc_8bpc_ssse3); - (*c).intra_pred[DC_128_PRED as usize] = Some(dav1d_ipred_dc_128_8bpc_ssse3); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(dav1d_ipred_dc_top_8bpc_ssse3); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(dav1d_ipred_dc_left_8bpc_ssse3); - (*c).intra_pred[HOR_PRED as usize] = Some(dav1d_ipred_h_8bpc_ssse3); - (*c).intra_pred[VERT_PRED as usize] = Some(dav1d_ipred_v_8bpc_ssse3); - (*c).intra_pred[PAETH_PRED as usize] = Some(dav1d_ipred_paeth_8bpc_ssse3); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(dav1d_ipred_smooth_8bpc_ssse3); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_8bpc_ssse3); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(dav1d_ipred_smooth_v_8bpc_ssse3); - (*c).intra_pred[Z1_PRED as usize] = Some(dav1d_ipred_z1_8bpc_ssse3); - (*c).intra_pred[Z2_PRED as usize] = Some(dav1d_ipred_z2_8bpc_ssse3); - (*c).intra_pred[Z3_PRED as usize] = Some(dav1d_ipred_z3_8bpc_ssse3); - (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_8bpc_ssse3); - - (*c).cfl_pred[DC_PRED as usize] = dav1d_ipred_cfl_8bpc_ssse3; - (*c).cfl_pred[DC_128_PRED as usize] = dav1d_ipred_cfl_128_8bpc_ssse3; - (*c).cfl_pred[TOP_DC_PRED as usize] = dav1d_ipred_cfl_top_8bpc_ssse3; - (*c).cfl_pred[LEFT_DC_PRED as usize] = dav1d_ipred_cfl_left_8bpc_ssse3; - - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = dav1d_ipred_cfl_ac_420_8bpc_ssse3; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = dav1d_ipred_cfl_ac_422_8bpc_ssse3; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = dav1d_ipred_cfl_ac_444_8bpc_ssse3; - - (*c).pal_pred = dav1d_pal_pred_8bpc_ssse3; - - #[cfg(target_arch = "x86_64")] - { - if !flags.contains(CpuFlags::AVX2) { - return; - } - - (*c).intra_pred[DC_PRED as usize] = Some(dav1d_ipred_dc_8bpc_avx2); - (*c).intra_pred[DC_128_PRED as usize] = Some(dav1d_ipred_dc_128_8bpc_avx2); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(dav1d_ipred_dc_top_8bpc_avx2); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(dav1d_ipred_dc_left_8bpc_avx2); - (*c).intra_pred[HOR_PRED as usize] = Some(dav1d_ipred_h_8bpc_avx2); - (*c).intra_pred[VERT_PRED as usize] = Some(dav1d_ipred_v_8bpc_avx2); - (*c).intra_pred[PAETH_PRED as usize] = Some(dav1d_ipred_paeth_8bpc_avx2); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(dav1d_ipred_smooth_8bpc_avx2); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_8bpc_avx2); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(dav1d_ipred_smooth_v_8bpc_avx2); - (*c).intra_pred[Z1_PRED as usize] = Some(dav1d_ipred_z1_8bpc_avx2); - (*c).intra_pred[Z2_PRED as usize] = Some(dav1d_ipred_z2_8bpc_avx2); - (*c).intra_pred[Z3_PRED as usize] = Some(dav1d_ipred_z3_8bpc_avx2); - (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_8bpc_avx2); - - (*c).cfl_pred[DC_PRED as usize] = dav1d_ipred_cfl_8bpc_avx2; - (*c).cfl_pred[DC_128_PRED as usize] = dav1d_ipred_cfl_128_8bpc_avx2; - (*c).cfl_pred[TOP_DC_PRED as usize] = dav1d_ipred_cfl_top_8bpc_avx2; - (*c).cfl_pred[LEFT_DC_PRED as usize] = dav1d_ipred_cfl_left_8bpc_avx2; - - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = dav1d_ipred_cfl_ac_420_8bpc_avx2; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = dav1d_ipred_cfl_ac_422_8bpc_avx2; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = dav1d_ipred_cfl_ac_444_8bpc_avx2; - - (*c).pal_pred = dav1d_pal_pred_8bpc_avx2; - - if !flags.contains(CpuFlags::AVX512ICL) { - return; - } - - (*c).intra_pred[DC_PRED as usize] = Some(dav1d_ipred_dc_8bpc_avx512icl); - (*c).intra_pred[DC_128_PRED as usize] = Some(dav1d_ipred_dc_128_8bpc_avx512icl); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(dav1d_ipred_dc_top_8bpc_avx512icl); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(dav1d_ipred_dc_left_8bpc_avx512icl); - (*c).intra_pred[HOR_PRED as usize] = Some(dav1d_ipred_h_8bpc_avx512icl); - (*c).intra_pred[VERT_PRED as usize] = Some(dav1d_ipred_v_8bpc_avx512icl); - (*c).intra_pred[PAETH_PRED as usize] = Some(dav1d_ipred_paeth_8bpc_avx512icl); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(dav1d_ipred_smooth_8bpc_avx512icl); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_8bpc_avx512icl); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(dav1d_ipred_smooth_v_8bpc_avx512icl); - (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_8bpc_avx512icl); - - (*c).pal_pred = dav1d_pal_pred_8bpc_avx512icl; - } -} - #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] #[inline(always)] unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { @@ -205,7 +116,9 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { #[cfg(feature = "asm")] cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - intra_pred_dsp_init_x86(c); + use crate::src::ipred::intra_pred_dsp_init_x86; + + intra_pred_dsp_init_x86::(c); } else if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { intra_pred_dsp_init_arm(c); } From 3e460f182533c7a11845897d5af0cca1132a5654 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 21:43:33 -0800 Subject: [PATCH 48/49] `fn intra_pred_dsp_init_arm`: Deduplicate w/ generics. --- src/ipred.rs | 129 ++++++++++++++++++++++++++----------------- src/ipred_tmpl_16.rs | 48 +--------------- src/ipred_tmpl_8.rs | 49 +--------------- 3 files changed, 83 insertions(+), 143 deletions(-) diff --git a/src/ipred.rs b/src/ipred.rs index 5c845cf08..b38fc5f74 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -22,7 +22,7 @@ use std::slice; #[cfg(feature = "asm")] use crate::include::common::bitdepth::bd_fn; -#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(feature = "asm")] use crate::{ include::dav1d::headers::Rav1dPixelLayout, src::cpu::{rav1d_get_cpu_flags, CpuFlags}, @@ -90,11 +90,10 @@ pub struct Rav1dIntraPredDSPContext { pub pal_pred: pal_pred_fn, } -// TODO(legare): Generated fns are temporarily pub until init fns are deduplicated. #[cfg(feature = "asm")] macro_rules! decl_fn { (angular_ipred, $name:ident) => { - pub(crate) fn $name( + fn $name( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -108,7 +107,7 @@ macro_rules! decl_fn { }; (cfl_pred, $name:ident) => { - pub(crate) fn $name( + fn $name( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -121,7 +120,7 @@ macro_rules! decl_fn { }; (cfl_ac, $name:ident) => { - pub(crate) fn $name( + fn $name( ac: *mut i16, y: *const DynPixel, stride: ptrdiff_t, @@ -133,7 +132,7 @@ macro_rules! decl_fn { }; (pal_pred, $name:ident) => { - pub(crate) fn $name( + fn $name( dst: *mut DynPixel, stride: ptrdiff_t, pal: *const u16, @@ -200,10 +199,9 @@ extern "C" { decl_fns!(pal_pred, pal_pred); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "bitdepth_8", feature = "asm", target_arch = "aarch64"))] extern "C" { - pub(crate) fn dav1d_ipred_z1_fill2_8bpc_neon( + fn dav1d_ipred_z1_fill2_8bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -212,7 +210,7 @@ extern "C" { dx: c_int, max_base_x: c_int, ); - pub(crate) fn dav1d_ipred_z1_fill1_8bpc_neon( + fn dav1d_ipred_z1_fill1_8bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -221,21 +219,21 @@ extern "C" { dx: c_int, max_base_x: c_int, ); - pub(crate) fn dav1d_ipred_z1_upsample_edge_8bpc_neon( + fn dav1d_ipred_z1_upsample_edge_8bpc_neon( out: *mut DynPixel, hsz: c_int, in_0: *const DynPixel, end: c_int, _bitdepth_max: c_int, ); - pub(crate) fn dav1d_ipred_z1_filter_edge_8bpc_neon( + fn dav1d_ipred_z1_filter_edge_8bpc_neon( out: *mut DynPixel, sz: c_int, in_0: *const DynPixel, end: c_int, strength: c_int, ); - pub(crate) fn dav1d_ipred_z2_fill3_8bpc_neon( + fn dav1d_ipred_z2_fill3_8bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -245,7 +243,7 @@ extern "C" { dx: c_int, dy: c_int, ); - pub(crate) fn dav1d_ipred_z2_fill2_8bpc_neon( + fn dav1d_ipred_z2_fill2_8bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -255,7 +253,7 @@ extern "C" { dx: c_int, dy: c_int, ); - pub(crate) fn dav1d_ipred_z2_fill1_8bpc_neon( + fn dav1d_ipred_z2_fill1_8bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -265,14 +263,14 @@ extern "C" { dx: c_int, dy: c_int, ); - pub(crate) fn dav1d_ipred_z2_upsample_edge_8bpc_neon( + fn dav1d_ipred_z2_upsample_edge_8bpc_neon( out: *mut DynPixel, hsz: c_int, in_0: *const DynPixel, _bitdepth_max: c_int, ); - pub(crate) fn dav1d_ipred_reverse_8bpc_neon(dst: *mut DynPixel, src: *const DynPixel, n: c_int); - pub(crate) fn dav1d_ipred_z3_fill2_8bpc_neon( + fn dav1d_ipred_reverse_8bpc_neon(dst: *mut DynPixel, src: *const DynPixel, n: c_int); + fn dav1d_ipred_z3_fill2_8bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, left: *const DynPixel, @@ -281,7 +279,7 @@ extern "C" { dy: c_int, max_base_y: c_int, ); - pub(crate) fn dav1d_ipred_z3_fill1_8bpc_neon( + fn dav1d_ipred_z3_fill1_8bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, left: *const DynPixel, @@ -290,17 +288,16 @@ extern "C" { dy: c_int, max_base_y: c_int, ); - pub(crate) fn dav1d_ipred_pixel_set_8bpc_neon( + fn dav1d_ipred_pixel_set_8bpc_neon( out: *mut DynPixel, px: ::Pixel, n: c_int, ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "bitdepth_16", feature = "asm", target_arch = "aarch64"))] extern "C" { - pub(crate) fn dav1d_ipred_z1_fill2_16bpc_neon( + fn dav1d_ipred_z1_fill2_16bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -309,7 +306,7 @@ extern "C" { dx: c_int, max_base_x: c_int, ); - pub(crate) fn dav1d_ipred_z1_fill1_16bpc_neon( + fn dav1d_ipred_z1_fill1_16bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -318,21 +315,21 @@ extern "C" { dx: c_int, max_base_x: c_int, ); - pub(crate) fn dav1d_ipred_z1_upsample_edge_16bpc_neon( + fn dav1d_ipred_z1_upsample_edge_16bpc_neon( out: *mut DynPixel, hsz: c_int, in_0: *const DynPixel, end: c_int, bitdepth_max: c_int, ); - pub(crate) fn dav1d_ipred_z1_filter_edge_16bpc_neon( + fn dav1d_ipred_z1_filter_edge_16bpc_neon( out: *mut DynPixel, sz: c_int, in_0: *const DynPixel, end: c_int, strength: c_int, ); - pub(crate) fn dav1d_ipred_z2_fill3_16bpc_neon( + fn dav1d_ipred_z2_fill3_16bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -342,7 +339,7 @@ extern "C" { dx: c_int, dy: c_int, ); - pub(crate) fn dav1d_ipred_z2_fill2_16bpc_neon( + fn dav1d_ipred_z2_fill2_16bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -352,7 +349,7 @@ extern "C" { dx: c_int, dy: c_int, ); - pub(crate) fn dav1d_ipred_z2_fill1_16bpc_neon( + fn dav1d_ipred_z2_fill1_16bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, top: *const DynPixel, @@ -362,18 +359,14 @@ extern "C" { dx: c_int, dy: c_int, ); - pub(crate) fn dav1d_ipred_z2_upsample_edge_16bpc_neon( + fn dav1d_ipred_z2_upsample_edge_16bpc_neon( out: *mut DynPixel, hsz: c_int, in_0: *const DynPixel, bitdepth_max: c_int, ); - pub(crate) fn dav1d_ipred_reverse_16bpc_neon( - dst: *mut DynPixel, - src: *const DynPixel, - n: c_int, - ); - pub(crate) fn dav1d_ipred_z3_fill2_16bpc_neon( + fn dav1d_ipred_reverse_16bpc_neon(dst: *mut DynPixel, src: *const DynPixel, n: c_int); + fn dav1d_ipred_z3_fill2_16bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, left: *const DynPixel, @@ -382,7 +375,7 @@ extern "C" { dy: c_int, max_base_y: c_int, ); - pub(crate) fn dav1d_ipred_z3_fill1_16bpc_neon( + fn dav1d_ipred_z3_fill1_16bpc_neon( dst: *mut DynPixel, stride: ptrdiff_t, left: *const DynPixel, @@ -391,7 +384,7 @@ extern "C" { dy: c_int, max_base_y: c_int, ); - pub(crate) fn dav1d_ipred_pixel_set_16bpc_neon( + fn dav1d_ipred_pixel_set_16bpc_neon( out: *mut DynPixel, px: ::Pixel, n: c_int, @@ -1032,9 +1025,8 @@ pub(crate) unsafe extern "C" fn ipred_smooth_h_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[inline(never)] -pub(crate) unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { +unsafe fn get_filter_strength(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { if is_sm != 0 { if wh <= 8 { if angle >= 64 { @@ -1126,9 +1118,8 @@ unsafe fn filter_edge( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[inline] -pub(crate) unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { +unsafe fn get_upsample(wh: c_int, angle: c_int, is_sm: c_int) -> c_int { return (angle < 40 && wh <= 16 >> is_sm) as c_int; } @@ -1564,8 +1555,7 @@ pub(crate) unsafe extern "C" fn ipred_z3_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe fn filter_fn( +unsafe fn filter_fn( flt_ptr: *const i8, p0: c_int, p1: c_int, @@ -1596,11 +1586,9 @@ pub(crate) unsafe fn filter_fn( cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - // TODO(kkysen) Temporarily pub until mod is deduplicated - pub(crate) const FLT_INCR: isize = 2; + const FLT_INCR: isize = 2; } else { - // TODO(kkysen) Temporarily pub until mod is deduplicated - pub(crate) const FLT_INCR: isize = 1; + const FLT_INCR: isize = 1; } } @@ -2278,9 +2266,8 @@ unsafe fn ipred_z3_neon( }; } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", target_arch = "aarch64"))] -pub(crate) unsafe extern "C" fn ipred_z1_neon_erased( +unsafe extern "C" fn ipred_z1_neon_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft_in: *const DynPixel, @@ -2304,9 +2291,8 @@ pub(crate) unsafe extern "C" fn ipred_z1_neon_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", target_arch = "aarch64"))] -pub(crate) unsafe extern "C" fn ipred_z2_neon_erased( +unsafe extern "C" fn ipred_z2_neon_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft_in: *const DynPixel, @@ -2330,9 +2316,8 @@ pub(crate) unsafe extern "C" fn ipred_z2_neon_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", target_arch = "aarch64"))] -pub(crate) unsafe extern "C" fn ipred_z3_neon_erased( +unsafe extern "C" fn ipred_z3_neon_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft_in: *const DynPixel, @@ -2446,3 +2431,43 @@ pub(crate) unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPre (*c).pal_pred = bd_fn!(BD, pal_pred, avx512icl); } } + +// TODO(kkysen) Temporarily pub until mod is deduplicated +#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] +#[inline(always)] +pub(crate) unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { + let flags = rav1d_get_cpu_flags(); + + if !flags.contains(CpuFlags::NEON) { + return; + } + + (*c).intra_pred[DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc, neon)); + (*c).intra_pred[DC_128_PRED as usize] = Some(bd_fn!(BD, ipred_dc_128, neon)); + (*c).intra_pred[TOP_DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc_top, neon)); + (*c).intra_pred[LEFT_DC_PRED as usize] = Some(bd_fn!(BD, ipred_dc_left, neon)); + (*c).intra_pred[HOR_PRED as usize] = Some(bd_fn!(BD, ipred_h, neon)); + (*c).intra_pred[VERT_PRED as usize] = Some(bd_fn!(BD, ipred_v, neon)); + (*c).intra_pred[PAETH_PRED as usize] = Some(bd_fn!(BD, ipred_paeth, neon)); + (*c).intra_pred[SMOOTH_PRED as usize] = Some(bd_fn!(BD, ipred_smooth, neon)); + (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(bd_fn!(BD, ipred_smooth_v, neon)); + (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(bd_fn!(BD, ipred_smooth_h, neon)); + #[cfg(target_arch = "aarch64")] + { + (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_neon_erased::); + (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_neon_erased::); + (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_neon_erased::); + } + (*c).intra_pred[FILTER_PRED as usize] = Some(bd_fn!(BD, ipred_filter, neon)); + + (*c).cfl_pred[DC_PRED as usize] = bd_fn!(BD, ipred_cfl, neon); + (*c).cfl_pred[DC_128_PRED as usize] = bd_fn!(BD, ipred_cfl_128, neon); + (*c).cfl_pred[TOP_DC_PRED as usize] = bd_fn!(BD, ipred_cfl_top, neon); + (*c).cfl_pred[LEFT_DC_PRED as usize] = bd_fn!(BD, ipred_cfl_left, neon); + + (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_420, neon); + (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_422, neon); + (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = bd_fn!(BD, ipred_cfl_ac_444, neon); + + (*c).pal_pred = bd_fn!(BD, pal_pred, neon); +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs index 8b4d8a096..a8e58d7d3 100644 --- a/src/ipred_tmpl_16.rs +++ b/src/ipred_tmpl_16.rs @@ -38,53 +38,9 @@ use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; - #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] -#[inline(always)] -unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { - // TODO(legare): Temporary import until init fns are deduplicated. - use crate::src::ipred::*; - - let flags = rav1d_get_cpu_flags(); - if !flags.contains(CpuFlags::NEON) { - return; - } - - (*c).intra_pred[DC_PRED as usize] = Some(dav1d_ipred_dc_16bpc_neon); - (*c).intra_pred[DC_128_PRED as usize] = Some(dav1d_ipred_dc_128_16bpc_neon); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(dav1d_ipred_dc_top_16bpc_neon); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(dav1d_ipred_dc_left_16bpc_neon); - (*c).intra_pred[HOR_PRED as usize] = Some(dav1d_ipred_h_16bpc_neon); - (*c).intra_pred[VERT_PRED as usize] = Some(dav1d_ipred_v_16bpc_neon); - (*c).intra_pred[PAETH_PRED as usize] = Some(dav1d_ipred_paeth_16bpc_neon); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(dav1d_ipred_smooth_16bpc_neon); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(dav1d_ipred_smooth_v_16bpc_neon); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_16bpc_neon); - #[cfg(target_arch = "aarch64")] - { - (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_neon_erased::); - (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_neon_erased::); - (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_neon_erased::); - } - (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_16bpc_neon); - - (*c).cfl_pred[DC_PRED as usize] = dav1d_ipred_cfl_16bpc_neon; - (*c).cfl_pred[DC_128_PRED as usize] = dav1d_ipred_cfl_128_16bpc_neon; - (*c).cfl_pred[TOP_DC_PRED as usize] = dav1d_ipred_cfl_top_16bpc_neon; - (*c).cfl_pred[LEFT_DC_PRED as usize] = dav1d_ipred_cfl_left_16bpc_neon; - - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = dav1d_ipred_cfl_ac_420_16bpc_neon; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = dav1d_ipred_cfl_ac_422_16bpc_neon; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = dav1d_ipred_cfl_ac_444_16bpc_neon; - - (*c).pal_pred = dav1d_pal_pred_16bpc_neon; -} - #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); @@ -120,7 +76,9 @@ pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) intra_pred_dsp_init_x86::(c); } else if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { - intra_pred_dsp_init_arm(c); + use crate::src::ipred::intra_pred_dsp_init_arm; + + intra_pred_dsp_init_arm::(c); } } } diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs index f029e5003..d70090e58 100644 --- a/src/ipred_tmpl_8.rs +++ b/src/ipred_tmpl_8.rs @@ -38,54 +38,9 @@ use crate::src::levels::Z1_PRED; use crate::src::levels::Z2_PRED; use crate::src::levels::Z3_PRED; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -use crate::src::cpu::{rav1d_get_cpu_flags, CpuFlags}; - #[cfg(feature = "asm")] use cfg_if::cfg_if; -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] -#[inline(always)] -unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { - // TODO(legare): Temporary import until init fns are deduplicated. - use crate::src::ipred::*; - - let flags = rav1d_get_cpu_flags(); - - if !flags.contains(CpuFlags::NEON) { - return; - } - - (*c).intra_pred[DC_PRED as usize] = Some(dav1d_ipred_dc_8bpc_neon); - (*c).intra_pred[DC_128_PRED as usize] = Some(dav1d_ipred_dc_128_8bpc_neon); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(dav1d_ipred_dc_top_8bpc_neon); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(dav1d_ipred_dc_left_8bpc_neon); - (*c).intra_pred[HOR_PRED as usize] = Some(dav1d_ipred_h_8bpc_neon); - (*c).intra_pred[VERT_PRED as usize] = Some(dav1d_ipred_v_8bpc_neon); - (*c).intra_pred[PAETH_PRED as usize] = Some(dav1d_ipred_paeth_8bpc_neon); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(dav1d_ipred_smooth_8bpc_neon); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(dav1d_ipred_smooth_v_8bpc_neon); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(dav1d_ipred_smooth_h_8bpc_neon); - #[cfg(target_arch = "aarch64")] - { - (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_neon_erased::); - (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_neon_erased::); - (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_neon_erased::); - } - (*c).intra_pred[FILTER_PRED as usize] = Some(dav1d_ipred_filter_8bpc_neon); - - (*c).cfl_pred[DC_PRED as usize] = dav1d_ipred_cfl_8bpc_neon; - (*c).cfl_pred[DC_128_PRED as usize] = dav1d_ipred_cfl_128_8bpc_neon; - (*c).cfl_pred[TOP_DC_PRED as usize] = dav1d_ipred_cfl_top_8bpc_neon; - (*c).cfl_pred[LEFT_DC_PRED as usize] = dav1d_ipred_cfl_left_8bpc_neon; - - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = dav1d_ipred_cfl_ac_420_8bpc_neon; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = dav1d_ipred_cfl_ac_422_8bpc_neon; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = dav1d_ipred_cfl_ac_444_8bpc_neon; - - (*c).pal_pred = dav1d_pal_pred_8bpc_neon; -} - #[cold] pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); @@ -120,7 +75,9 @@ pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { intra_pred_dsp_init_x86::(c); } else if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { - intra_pred_dsp_init_arm(c); + use crate::src::ipred::intra_pred_dsp_init_arm; + + intra_pred_dsp_init_arm::(c); } } } From e26e7b55764ef5f34e29b1f3be855f2f696d8aca Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 18 Nov 2023 21:51:52 -0800 Subject: [PATCH 49/49] `fn rav1d_intra_pred_dsp_init`: Deduplicate w/ generics. --- lib.rs | 4 -- src/decode.rs | 8 +-- src/ipred.rs | 151 +++++++++++++++++++++++-------------------- src/ipred_tmpl_16.rs | 84 ------------------------ src/ipred_tmpl_8.rs | 83 ------------------------ 5 files changed, 86 insertions(+), 244 deletions(-) delete mode 100644 src/ipred_tmpl_16.rs delete mode 100644 src/ipred_tmpl_8.rs diff --git a/lib.rs b/lib.rs index 12c889498..a8f64784b 100644 --- a/lib.rs +++ b/lib.rs @@ -59,10 +59,6 @@ pub mod src { mod intra_edge; mod ipred; mod ipred_prepare; - #[cfg(feature = "bitdepth_16")] - mod ipred_tmpl_16; - #[cfg(feature = "bitdepth_8")] - mod ipred_tmpl_8; mod itx; mod itx_1d; #[cfg(feature = "bitdepth_16")] diff --git a/src/decode.rs b/src/decode.rs index 82ee4d9e6..908107470 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -87,6 +87,7 @@ use crate::src::intra_edge::EdgeFlags; use crate::src::intra_edge::EdgeNode; use crate::src::intra_edge::EdgeTip; use crate::src::intra_edge::EDGE_I444_TOP_HAS_RIGHT; +use crate::src::ipred::rav1d_intra_pred_dsp_init; use crate::src::levels::mv; use crate::src::levels::Av1Block; use crate::src::levels::BS_128x128; @@ -244,14 +245,13 @@ use std::sync::atomic::Ordering; #[cfg(feature = "bitdepth_8")] use crate::{ include::common::bitdepth::BitDepth8, src::cdef_tmpl_8::rav1d_cdef_dsp_init_8bpc, - src::ipred_tmpl_8::rav1d_intra_pred_dsp_init_8bpc, src::itx_tmpl_8::rav1d_itx_dsp_init_8bpc, + src::itx_tmpl_8::rav1d_itx_dsp_init_8bpc, src::loopfilter_tmpl_8::rav1d_loop_filter_dsp_init_8bpc, }; #[cfg(feature = "bitdepth_16")] use crate::{ include::common::bitdepth::BitDepth16, src::cdef_tmpl_16::rav1d_cdef_dsp_init_16bpc, - src::ipred_tmpl_16::rav1d_intra_pred_dsp_init_16bpc, src::itx_tmpl_16::rav1d_itx_dsp_init_16bpc, src::loopfilter_tmpl_16::rav1d_loop_filter_dsp_init_16bpc, }; @@ -5051,7 +5051,7 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { #[cfg(feature = "bitdepth_8")] 8 => { rav1d_cdef_dsp_init_8bpc(&mut dsp.cdef); - rav1d_intra_pred_dsp_init_8bpc(&mut dsp.ipred); + rav1d_intra_pred_dsp_init::(&mut dsp.ipred); rav1d_itx_dsp_init_8bpc(&mut dsp.itx, bpc); rav1d_loop_filter_dsp_init_8bpc(&mut dsp.lf); rav1d_loop_restoration_dsp_init::(&mut dsp.lr, bpc); @@ -5061,7 +5061,7 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { #[cfg(feature = "bitdepth_16")] 10 | 12 => { rav1d_cdef_dsp_init_16bpc(&mut dsp.cdef); - rav1d_intra_pred_dsp_init_16bpc(&mut dsp.ipred); + rav1d_intra_pred_dsp_init::(&mut dsp.ipred); rav1d_itx_dsp_init_16bpc(&mut dsp.itx, bpc); rav1d_loop_filter_dsp_init_16bpc(&mut dsp.lf); rav1d_loop_restoration_dsp_init::(&mut dsp.lr, bpc); diff --git a/src/ipred.rs b/src/ipred.rs index b38fc5f74..2b31f01e6 100644 --- a/src/ipred.rs +++ b/src/ipred.rs @@ -5,6 +5,21 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::bitdepth::BPC; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; +use crate::include::dav1d::headers::Rav1dPixelLayout; +use crate::src::levels::DC_128_PRED; +use crate::src::levels::DC_PRED; +use crate::src::levels::FILTER_PRED; +use crate::src::levels::HOR_PRED; +use crate::src::levels::LEFT_DC_PRED; +use crate::src::levels::PAETH_PRED; +use crate::src::levels::SMOOTH_H_PRED; +use crate::src::levels::SMOOTH_PRED; +use crate::src::levels::SMOOTH_V_PRED; +use crate::src::levels::TOP_DC_PRED; +use crate::src::levels::VERT_PRED; +use crate::src::levels::Z1_PRED; +use crate::src::levels::Z2_PRED; +use crate::src::levels::Z3_PRED; use crate::src::tables::dav1d_dr_intra_derivative; use crate::src::tables::dav1d_filter_intra_taps; use crate::src::tables::dav1d_sm_weights; @@ -20,27 +35,7 @@ use std::ffi::c_void; use std::slice; #[cfg(feature = "asm")] -use crate::include::common::bitdepth::bd_fn; - -#[cfg(feature = "asm")] -use crate::{ - include::dav1d::headers::Rav1dPixelLayout, - src::cpu::{rav1d_get_cpu_flags, CpuFlags}, - src::levels::DC_128_PRED, - src::levels::DC_PRED, - src::levels::FILTER_PRED, - src::levels::HOR_PRED, - src::levels::LEFT_DC_PRED, - src::levels::PAETH_PRED, - src::levels::SMOOTH_H_PRED, - src::levels::SMOOTH_PRED, - src::levels::SMOOTH_V_PRED, - src::levels::TOP_DC_PRED, - src::levels::VERT_PRED, - src::levels::Z1_PRED, - src::levels::Z2_PRED, - src::levels::Z3_PRED, -}; +use crate::{include::common::bitdepth::bd_fn, src::cpu::rav1d_get_cpu_flags, src::cpu::CpuFlags}; #[cfg(all(feature = "asm", target_arch = "aarch64"))] use ::to_method::To; @@ -490,8 +485,7 @@ unsafe fn dc_gen_top(topleft: *const BD::Pixel, width: c_int) -> c return dc >> ctz(width as c_uint); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_dc_top_c_erased( +unsafe extern "C" fn ipred_dc_top_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -512,8 +506,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_top_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_cfl_top_c_erased( +unsafe extern "C" fn ipred_cfl_top_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -545,8 +538,7 @@ unsafe fn dc_gen_left(topleft: *const BD::Pixel, height: c_int) -> return dc >> ctz(height as c_uint); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_dc_left_c_erased( +unsafe extern "C" fn ipred_dc_left_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -567,8 +559,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_left_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_cfl_left_c_erased( +unsafe extern "C" fn ipred_cfl_left_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -620,8 +611,7 @@ unsafe fn dc_gen(topleft: *const BD::Pixel, width: c_int, height: return dc; } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_dc_c_erased( +unsafe extern "C" fn ipred_dc_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -642,8 +632,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_cfl_c_erased( +unsafe extern "C" fn ipred_cfl_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -666,8 +655,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_dc_128_c_erased( +unsafe extern "C" fn ipred_dc_128_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, _topleft: *const DynPixel, @@ -683,8 +671,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_128_c_erased( splat_dc(dst.cast(), stride, width, height, dc, bd); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_cfl_128_c_erased( +unsafe extern "C" fn ipred_cfl_128_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, _topleft: *const DynPixel, @@ -724,8 +711,7 @@ unsafe fn ipred_v_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_v_c_erased( +unsafe extern "C" fn ipred_v_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -774,8 +760,7 @@ unsafe fn ipred_h_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_h_c_erased( +unsafe extern "C" fn ipred_h_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -836,8 +821,7 @@ unsafe fn ipred_paeth_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_paeth_c_erased( +unsafe extern "C" fn ipred_paeth_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, tl_ptr: *const DynPixel, @@ -894,8 +878,7 @@ unsafe fn ipred_smooth_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_smooth_c_erased( +unsafe extern "C" fn ipred_smooth_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -947,8 +930,7 @@ unsafe fn ipred_smooth_v_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_smooth_v_c_erased( +unsafe extern "C" fn ipred_smooth_v_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -1000,8 +982,7 @@ unsafe fn ipred_smooth_h_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_smooth_h_c_erased( +unsafe extern "C" fn ipred_smooth_h_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft: *const DynPixel, @@ -1480,8 +1461,7 @@ unsafe fn ipred_z3_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_z1_c_erased( +unsafe extern "C" fn ipred_z1_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft_in: *const DynPixel, @@ -1505,8 +1485,7 @@ pub(crate) unsafe extern "C" fn ipred_z1_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_z2_c_erased( +unsafe extern "C" fn ipred_z2_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft_in: *const DynPixel, @@ -1530,8 +1509,7 @@ pub(crate) unsafe extern "C" fn ipred_z2_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_z3_c_erased( +unsafe extern "C" fn ipred_z3_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft_in: *const DynPixel, @@ -1650,8 +1628,7 @@ unsafe fn ipred_filter_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn ipred_filter_c_erased( +unsafe extern "C" fn ipred_filter_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, topleft_in: *const DynPixel, @@ -1765,8 +1742,7 @@ unsafe fn cfl_ac_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn cfl_ac_420_c_erased( +unsafe extern "C" fn cfl_ac_420_c_erased( ac: *mut i16, ypx: *const DynPixel, stride: ptrdiff_t, @@ -1788,8 +1764,7 @@ pub(crate) unsafe extern "C" fn cfl_ac_420_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn cfl_ac_422_c_erased( +unsafe extern "C" fn cfl_ac_422_c_erased( ac: *mut i16, ypx: *const DynPixel, stride: ptrdiff_t, @@ -1811,8 +1786,7 @@ pub(crate) unsafe extern "C" fn cfl_ac_422_c_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn cfl_ac_444_c_erased( +unsafe extern "C" fn cfl_ac_444_c_erased( ac: *mut i16, ypx: *const DynPixel, stride: ptrdiff_t, @@ -1856,8 +1830,7 @@ unsafe fn pal_pred_rust( } } -// TODO(kkysen) Temporarily pub until mod is deduplicated -pub(crate) unsafe extern "C" fn pal_pred_c_erased( +unsafe extern "C" fn pal_pred_c_erased( dst: *mut DynPixel, stride: ptrdiff_t, pal: *const u16, @@ -2341,10 +2314,9 @@ unsafe extern "C" fn ipred_z3_neon_erased( ); } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))] #[inline(always)] -pub(crate) unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPredDSPContext) { +unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPredDSPContext) { let flags = rav1d_get_cpu_flags(); if !flags.contains(CpuFlags::SSSE3) { @@ -2432,10 +2404,9 @@ pub(crate) unsafe fn intra_pred_dsp_init_x86(c: *mut Rav1dIntraPre } } -// TODO(kkysen) Temporarily pub until mod is deduplicated #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))] #[inline(always)] -pub(crate) unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { +unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPredDSPContext) { let flags = rav1d_get_cpu_flags(); if !flags.contains(CpuFlags::NEON) { @@ -2471,3 +2442,45 @@ pub(crate) unsafe fn intra_pred_dsp_init_arm(c: *mut Rav1dIntraPre (*c).pal_pred = bd_fn!(BD, pal_pred, neon); } + +#[cold] +pub unsafe fn rav1d_intra_pred_dsp_init(c: *mut Rav1dIntraPredDSPContext) { + (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); + (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased::); + (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); + (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); + (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased::); + (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased::); + (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased::); + (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased::); + (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased::); + (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased::); + (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased::); + (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased::); + (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); + (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased::); + + (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased::; + (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased::; + (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased::; + (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; + + (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased::; + (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; + (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; + + (*c).pal_pred = pal_pred_c_erased::; + + #[cfg(feature = "asm")] + cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + use crate::src::ipred::intra_pred_dsp_init_x86; + + intra_pred_dsp_init_x86::(c); + } else if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { + use crate::src::ipred::intra_pred_dsp_init_arm; + + intra_pred_dsp_init_arm::(c); + } + } +} diff --git a/src/ipred_tmpl_16.rs b/src/ipred_tmpl_16.rs deleted file mode 100644 index a8e58d7d3..000000000 --- a/src/ipred_tmpl_16.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::include::common::bitdepth::BitDepth16; -use crate::include::dav1d::headers::Rav1dPixelLayout; -use crate::src::ipred::cfl_ac_420_c_erased; -use crate::src::ipred::cfl_ac_422_c_erased; -use crate::src::ipred::cfl_ac_444_c_erased; -use crate::src::ipred::ipred_cfl_128_c_erased; -use crate::src::ipred::ipred_cfl_c_erased; -use crate::src::ipred::ipred_cfl_left_c_erased; -use crate::src::ipred::ipred_cfl_top_c_erased; -use crate::src::ipred::ipred_dc_128_c_erased; -use crate::src::ipred::ipred_dc_c_erased; -use crate::src::ipred::ipred_dc_left_c_erased; -use crate::src::ipred::ipred_dc_top_c_erased; -use crate::src::ipred::ipred_filter_c_erased; -use crate::src::ipred::ipred_h_c_erased; -use crate::src::ipred::ipred_paeth_c_erased; -use crate::src::ipred::ipred_smooth_c_erased; -use crate::src::ipred::ipred_smooth_h_c_erased; -use crate::src::ipred::ipred_smooth_v_c_erased; -use crate::src::ipred::ipred_v_c_erased; -use crate::src::ipred::ipred_z1_c_erased; -use crate::src::ipred::ipred_z2_c_erased; -use crate::src::ipred::ipred_z3_c_erased; -use crate::src::ipred::pal_pred_c_erased; -use crate::src::ipred::Rav1dIntraPredDSPContext; -use crate::src::levels::DC_128_PRED; -use crate::src::levels::DC_PRED; -use crate::src::levels::FILTER_PRED; -use crate::src::levels::HOR_PRED; -use crate::src::levels::LEFT_DC_PRED; -use crate::src::levels::PAETH_PRED; -use crate::src::levels::SMOOTH_H_PRED; -use crate::src::levels::SMOOTH_PRED; -use crate::src::levels::SMOOTH_V_PRED; -use crate::src::levels::TOP_DC_PRED; -use crate::src::levels::VERT_PRED; -use crate::src::levels::Z1_PRED; -use crate::src::levels::Z2_PRED; -use crate::src::levels::Z3_PRED; - -#[cfg(feature = "asm")] -use cfg_if::cfg_if; - -#[cold] -pub unsafe fn rav1d_intra_pred_dsp_init_16bpc(c: *mut Rav1dIntraPredDSPContext) { - (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); - (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased::); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); - (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased::); - (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased::); - (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased::); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased::); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased::); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased::); - (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased::); - (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased::); - (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); - (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased::); - - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased::; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased::; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased::; - (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; - - (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased::; - (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; - (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; - - (*c).pal_pred = pal_pred_c_erased::; - - #[cfg(feature = "asm")] - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - use crate::src::ipred::intra_pred_dsp_init_x86; - - intra_pred_dsp_init_x86::(c); - } else if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { - use crate::src::ipred::intra_pred_dsp_init_arm; - - intra_pred_dsp_init_arm::(c); - } - } -} diff --git a/src/ipred_tmpl_8.rs b/src/ipred_tmpl_8.rs deleted file mode 100644 index d70090e58..000000000 --- a/src/ipred_tmpl_8.rs +++ /dev/null @@ -1,83 +0,0 @@ -use crate::include::common::bitdepth::BitDepth8; -use crate::include::dav1d::headers::Rav1dPixelLayout; -use crate::src::ipred::cfl_ac_420_c_erased; -use crate::src::ipred::cfl_ac_422_c_erased; -use crate::src::ipred::cfl_ac_444_c_erased; -use crate::src::ipred::ipred_cfl_128_c_erased; -use crate::src::ipred::ipred_cfl_c_erased; -use crate::src::ipred::ipred_cfl_left_c_erased; -use crate::src::ipred::ipred_cfl_top_c_erased; -use crate::src::ipred::ipred_dc_128_c_erased; -use crate::src::ipred::ipred_dc_c_erased; -use crate::src::ipred::ipred_dc_left_c_erased; -use crate::src::ipred::ipred_dc_top_c_erased; -use crate::src::ipred::ipred_filter_c_erased; -use crate::src::ipred::ipred_h_c_erased; -use crate::src::ipred::ipred_paeth_c_erased; -use crate::src::ipred::ipred_smooth_c_erased; -use crate::src::ipred::ipred_smooth_h_c_erased; -use crate::src::ipred::ipred_smooth_v_c_erased; -use crate::src::ipred::ipred_v_c_erased; -use crate::src::ipred::ipred_z1_c_erased; -use crate::src::ipred::ipred_z2_c_erased; -use crate::src::ipred::ipred_z3_c_erased; -use crate::src::ipred::pal_pred_c_erased; -use crate::src::ipred::Rav1dIntraPredDSPContext; -use crate::src::levels::DC_128_PRED; -use crate::src::levels::DC_PRED; -use crate::src::levels::FILTER_PRED; -use crate::src::levels::HOR_PRED; -use crate::src::levels::LEFT_DC_PRED; -use crate::src::levels::PAETH_PRED; -use crate::src::levels::SMOOTH_H_PRED; -use crate::src::levels::SMOOTH_PRED; -use crate::src::levels::SMOOTH_V_PRED; -use crate::src::levels::TOP_DC_PRED; -use crate::src::levels::VERT_PRED; -use crate::src::levels::Z1_PRED; -use crate::src::levels::Z2_PRED; -use crate::src::levels::Z3_PRED; - -#[cfg(feature = "asm")] -use cfg_if::cfg_if; - -#[cold] -pub unsafe fn rav1d_intra_pred_dsp_init_8bpc(c: *mut Rav1dIntraPredDSPContext) { - (*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::); - (*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased::); - (*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::); - (*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::); - (*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased::); - (*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased::); - (*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased::); - (*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased::); - (*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased::); - (*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased::); - (*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased::); - (*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased::); - (*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::); - (*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased::); - - (*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased::; - (*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased::; - (*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased::; - (*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::; - (*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased::; - (*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::; - (*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::; - - (*c).pal_pred = pal_pred_c_erased::; - - #[cfg(feature = "asm")] - cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - use crate::src::ipred::intra_pred_dsp_init_x86; - - intra_pred_dsp_init_x86::(c); - } else if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { - use crate::src::ipred::intra_pred_dsp_init_arm; - - intra_pred_dsp_init_arm::(c); - } - } -}