From ed1dd603154f2fa8ad53e6bc76b779b4f204d111 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Thu, 20 Jun 2024 15:20:22 -0700 Subject: [PATCH 01/17] `rav1d_cdef_brow`: Remove mutable `offset` var --- src/cdef_apply.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 747e59a12..37140a3c2 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -263,20 +263,19 @@ pub(crate) unsafe fn rav1d_cdef_brow( let mut top = 0 as *const BD::Pixel; let mut bot = 0 as *const BD::Pixel; - let mut offset: ptrdiff_t; let st_y: bool; if !have_tt { st_y = true; } else if sbrow_start && by == by_start { if resize { - offset = ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize; + let offset = ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize; top = &*f .lf .cdef_line_buf .element_as((f.lf.cdef_lpf_line[0] as isize + offset) as usize); } else { - offset = (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride + let offset = (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride + (bx * 4) as isize; top = &*f .lf @@ -287,12 +286,12 @@ pub(crate) unsafe fn rav1d_cdef_brow( bot = bottom.as_ptr::(); st_y = false; } else if !sbrow_start && by + 2 >= by_end { - offset = (sby * 4) as isize * y_stride + (bx * 4) as isize; + let offset = (sby * 4) as isize * y_stride + (bx * 4) as isize; top = &*f.lf.cdef_line_buf.element_as( (f.lf.cdef_line[tf as usize][0] as isize + offset) as usize, ); if resize { - offset = (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize; + let offset = (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize; // FIXME incorrect; should be kept as an offset for later slices. bot = &*f .lf @@ -300,7 +299,7 @@ pub(crate) unsafe fn rav1d_cdef_brow( .element_as((f.lf.cdef_lpf_line[0] as isize + offset) as usize); } else { let line = sby * ((4 as c_int) << sb128) + 4 * sb128 as c_int + 2; - offset = line as isize * y_stride + (bx * 4) as isize; + let offset = line as isize * y_stride + (bx * 4) as isize; // FIXME incorrect; should be kept as an offset for later slices. bot = &*f .lf @@ -313,7 +312,7 @@ pub(crate) unsafe fn rav1d_cdef_brow( } if st_y { - offset = have_tt as isize * (sby * 4) as isize * y_stride + let offset = have_tt as isize * (sby * 4) as isize * y_stride + (bx * 4) as isize; top = &*f.lf.cdef_line_buf.element_as( (f.lf.cdef_line[tf as usize][0] as isize + offset) as usize, @@ -367,14 +366,14 @@ pub(crate) unsafe fn rav1d_cdef_brow( st_uv = true; } else if sbrow_start && by == by_start { if resize { - offset = ((sby - 1) * 4) as isize * uv_stride + let offset = ((sby - 1) * 4) as isize * uv_stride + (bx * 4 >> ss_hor) as isize; top = &*f.lf.cdef_line_buf.element_as( (f.lf.cdef_lpf_line[pl] as isize + offset) as usize, ); } else { let line_0 = sby * ((4 as c_int) << sb128) - 4; - offset = line_0 as isize * uv_stride + let offset = line_0 as isize * uv_stride + (bx * 4 >> ss_hor) as isize; top = &*f.lf.lr_line_buf.element_as( (f.lf.lr_lpf_line[pl] as isize + offset) as usize, @@ -391,7 +390,7 @@ pub(crate) unsafe fn rav1d_cdef_brow( as usize, ); if resize { - offset = (sby * 4 + 2) as isize * uv_stride + let offset = (sby * 4 + 2) as isize * uv_stride + (bx * 4 >> ss_hor) as isize; // FIXME incorrect; should be kept as an offset for later slices. bot = &*f.lf.cdef_line_buf.element_as( @@ -400,7 +399,7 @@ pub(crate) unsafe fn rav1d_cdef_brow( } else { let line = sby * ((4 as c_int) << sb128) + 4 * sb128 as c_int + 2; - offset = + let offset = line as isize * uv_stride + (bx * 4 >> ss_hor) as isize; // FIXME incorrect; should be kept as an offset for later slices. bot = &*f.lf.lr_line_buf.element_as( From cc554951a427ee9ef0efa04122b1a114833bfa7d Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Thu, 20 Jun 2024 16:30:33 -0700 Subject: [PATCH 02/17] `cdef::Fn::call`: Make `top` arg a safe ref + offset --- src/cdef.rs | 7 +++- src/cdef_apply.rs | 94 +++++++++++++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 42 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index 6ba2acca8..92e8d3de4 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -5,7 +5,10 @@ use crate::include::common::bitdepth::LeftPixelRow2px; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; +use crate::src::align::Align64; +use crate::src::align::AlignedVec; use crate::src::cpu::CpuFlags; +use crate::src::disjoint_mut::DisjointMut; use crate::src::ffi_safe::FFISafe; use crate::src::strided::Strided as _; use crate::src::tables::dav1d_cdef_directions; @@ -63,7 +66,7 @@ impl cdef::Fn { &self, dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: *const BD::Pixel, + (top, top_off): (&DisjointMut>>, usize), bottom: *const BD::Pixel, pri_strength: c_int, sec_strength: u8, @@ -75,7 +78,7 @@ impl cdef::Fn { let dst_ptr = dst.as_mut_ptr::().cast(); let stride = dst.stride(); let left = ptr::from_ref(left).cast(); - let top = top.cast(); + let top = (&*top.element_as(top_off) as *const BD::Pixel).cast(); let bottom = bottom.cast(); let sec_strength = sec_strength as c_int; let damping = damping as c_int; diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 37140a3c2..db5ae2510 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -261,7 +261,8 @@ pub(crate) unsafe fn rav1d_cdef_brow( 0 }; - let mut top = 0 as *const BD::Pixel; + // TODO: Remove `st_y` and put both `top` and `bot` in an `Option`. + let mut top = None; let mut bot = 0 as *const BD::Pixel; let st_y: bool; @@ -269,27 +270,31 @@ pub(crate) unsafe fn rav1d_cdef_brow( st_y = true; } else if sbrow_start && by == by_start { if resize { - let offset = ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize; - top = &*f - .lf - .cdef_line_buf - .element_as((f.lf.cdef_lpf_line[0] as isize + offset) as usize); + top = Some(( + &f.lf.cdef_line_buf, + f.lf.cdef_lpf_line[0].wrapping_add_signed( + ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize, + ), + )); } else { - let offset = (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride - + (bx * 4) as isize; - top = &*f - .lf - .lr_line_buf - .element_as((f.lf.lr_lpf_line[0] as isize + offset) as usize); + top = Some(( + &f.lf.lr_line_buf, + f.lf.lr_lpf_line[0].wrapping_add_signed( + (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride + + (bx * 4) as isize, + ), + )); } let bottom = bptrs[0] + (8 * y_stride); bot = bottom.as_ptr::(); st_y = false; } else if !sbrow_start && by + 2 >= by_end { - let offset = (sby * 4) as isize * y_stride + (bx * 4) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_line[tf as usize][0] as isize + offset) as usize, - ); + top = Some(( + &f.lf.cdef_line_buf, + f.lf.cdef_line[tf as usize][0].wrapping_add_signed( + (sby * 4) as isize * y_stride + (bx * 4) as isize, + ), + )); if resize { let offset = (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize; // FIXME incorrect; should be kept as an offset for later slices. @@ -312,14 +317,17 @@ pub(crate) unsafe fn rav1d_cdef_brow( } if st_y { - let offset = have_tt as isize * (sby * 4) as isize * y_stride - + (bx * 4) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_line[tf as usize][0] as isize + offset) as usize, - ); + top = Some(( + &f.lf.cdef_line_buf, + f.lf.cdef_line[tf as usize][0].wrapping_add_signed( + have_tt as isize * (sby * 4) as isize * y_stride + + (bx * 4) as isize, + ), + )); let bottom = bptrs[0] + (8 * y_stride); bot = bottom.as_ptr::(); } + let mut top = top.unwrap(); if y_pri_lvl != 0 { let adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance); @@ -366,28 +374,33 @@ pub(crate) unsafe fn rav1d_cdef_brow( st_uv = true; } else if sbrow_start && by == by_start { if resize { - let offset = ((sby - 1) * 4) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_lpf_line[pl] as isize + offset) as usize, + top = ( + &f.lf.cdef_line_buf, + f.lf.cdef_lpf_line[pl].wrapping_add_signed( + ((sby - 1) * 4) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), ); } else { - let line_0 = sby * ((4 as c_int) << sb128) - 4; - let offset = line_0 as isize * uv_stride - + (bx * 4 >> ss_hor) as isize; - top = &*f.lf.lr_line_buf.element_as( - (f.lf.lr_lpf_line[pl] as isize + offset) as usize, + let line = sby * ((4 as c_int) << sb128) - 4; + top = ( + &f.lf.lr_line_buf, + f.lf.lr_lpf_line[pl].wrapping_add_signed( + line as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), ); } let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); bot = bottom.as_ptr::(); st_uv = false; } else if !sbrow_start && by + 2 >= by_end { - let top_offset: ptrdiff_t = (sby * 8) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_line[tf as usize][pl] as isize + top_offset) - as usize, + top = ( + &f.lf.cdef_line_buf, + f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( + (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), ); if resize { let offset = (sby * 4 + 2) as isize * uv_stride @@ -412,11 +425,12 @@ pub(crate) unsafe fn rav1d_cdef_brow( } if st_uv { - let offset = have_tt as isize * (sby * 8) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize; - top = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_line[tf as usize][pl] as isize + offset) - as usize, + top = ( + &f.lf.cdef_line_buf, + f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( + have_tt as isize * (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), ); let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); bot = bottom.as_ptr::(); From d98717d2fa5e94fa5d4b2edc350625dc588965fb Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Thu, 20 Jun 2024 17:06:28 -0700 Subject: [PATCH 03/17] `cdef::Fn::call`: Make `bottom` arg safe --- src/cdef.rs | 14 ++++- src/cdef_apply.rs | 140 ++++++++++++++++++++++------------------------ 2 files changed, 79 insertions(+), 75 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index 92e8d3de4..e3624afdf 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -56,6 +56,11 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( _dst: *const FFISafe, ) -> ()); +pub enum CdefBottom<'a> { + Pic(Rav1dPictureDataComponentOffset<'a>), + LineBuf((&'a DisjointMut>>, usize)), +} + impl cdef::Fn { /// CDEF operates entirely on pre-filter data. /// If bottom/right edges are present (according to `edges`), @@ -67,7 +72,7 @@ impl cdef::Fn { dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], (top, top_off): (&DisjointMut>>, usize), - bottom: *const BD::Pixel, + bottom: CdefBottom<'_>, pri_strength: c_int, sec_strength: u8, dir: c_int, @@ -79,7 +84,12 @@ impl cdef::Fn { let stride = dst.stride(); let left = ptr::from_ref(left).cast(); let top = (&*top.element_as(top_off) as *const BD::Pixel).cast(); - let bottom = bottom.cast(); + let bottom = match bottom { + CdefBottom::Pic(bot) => bot.as_ptr::().cast(), + CdefBottom::LineBuf((bot, bot_off)) => { + (&*bot.element_as(bot_off) as *const BD::Pixel).cast() + } + }; let sec_strength = sec_strength as c_int; let damping = damping as c_int; let bd = bd.into_c(); diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index db5ae2510..5f0567c44 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -5,6 +5,7 @@ use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; use crate::src::align::Align16; use crate::src::align::AlignedVec64; +use crate::src::cdef::CdefBottom; use crate::src::cdef::CdefEdgeFlags; use crate::src::disjoint_mut::DisjointMut; use crate::src::internal::Rav1dContext; @@ -261,73 +262,66 @@ pub(crate) unsafe fn rav1d_cdef_brow( 0 }; - // TODO: Remove `st_y` and put both `top` and `bot` in an `Option`. - let mut top = None; - let mut bot = 0 as *const BD::Pixel; - let st_y: bool; - - if !have_tt { - st_y = true; + let top_bot = if !have_tt { + None } else if sbrow_start && by == by_start { - if resize { - top = Some(( + let top = if resize { + ( &f.lf.cdef_line_buf, f.lf.cdef_lpf_line[0].wrapping_add_signed( ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize, ), - )); + ) } else { - top = Some(( + ( &f.lf.lr_line_buf, f.lf.lr_lpf_line[0].wrapping_add_signed( (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride + (bx * 4) as isize, ), - )); - } + ) + }; let bottom = bptrs[0] + (8 * y_stride); - bot = bottom.as_ptr::(); - st_y = false; + Some((top, CdefBottom::Pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { - top = Some(( + let top = ( &f.lf.cdef_line_buf, f.lf.cdef_line[tf as usize][0].wrapping_add_signed( (sby * 4) as isize * y_stride + (bx * 4) as isize, ), - )); - if resize { - let offset = (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize; - // FIXME incorrect; should be kept as an offset for later slices. - bot = &*f - .lf - .cdef_line_buf - .element_as((f.lf.cdef_lpf_line[0] as isize + offset) as usize); + ); + let bottom = if resize { + ( + &f.lf.cdef_line_buf, + f.lf.cdef_lpf_line[0].wrapping_add_signed( + (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize, + ), + ) } else { let line = sby * ((4 as c_int) << sb128) + 4 * sb128 as c_int + 2; - let offset = line as isize * y_stride + (bx * 4) as isize; - // FIXME incorrect; should be kept as an offset for later slices. - bot = &*f - .lf - .lr_line_buf - .element_as((f.lf.lr_lpf_line[0] as isize + offset) as usize); - } - st_y = false; + ( + &f.lf.lr_line_buf, + f.lf.lr_lpf_line[0].wrapping_add_signed( + line as isize * y_stride + (bx * 4) as isize, + ), + ) + }; + Some((top, CdefBottom::LineBuf(bottom))) } else { - st_y = true; - } + None + }; - if st_y { - top = Some(( + let (top, bot) = top_bot.unwrap_or_else(|| { + let top = ( &f.lf.cdef_line_buf, f.lf.cdef_line[tf as usize][0].wrapping_add_signed( have_tt as isize * (sby * 4) as isize * y_stride + (bx * 4) as isize, ), - )); + ); let bottom = bptrs[0] + (8 * y_stride); - bot = bottom.as_ptr::(); - } - let mut top = top.unwrap(); + (top, CdefBottom::Pic(bottom)) + }); if y_pri_lvl != 0 { let adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance); @@ -369,63 +363,63 @@ pub(crate) unsafe fn rav1d_cdef_brow( 0 }; for pl in 1..=2 { - let st_uv: bool; - if !have_tt { - st_uv = true; + let top_bot = if !have_tt { + None } else if sbrow_start && by == by_start { - if resize { - top = ( + let top = if resize { + ( &f.lf.cdef_line_buf, f.lf.cdef_lpf_line[pl].wrapping_add_signed( ((sby - 1) * 4) as isize * uv_stride + (bx * 4 >> ss_hor) as isize, ), - ); + ) } else { let line = sby * ((4 as c_int) << sb128) - 4; - top = ( + ( &f.lf.lr_line_buf, f.lf.lr_lpf_line[pl].wrapping_add_signed( line as isize * uv_stride + (bx * 4 >> ss_hor) as isize, ), - ); - } + ) + }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - bot = bottom.as_ptr::(); - st_uv = false; + Some((top, CdefBottom::Pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { - top = ( + let top = ( &f.lf.cdef_line_buf, f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( (sby * 8) as isize * uv_stride + (bx * 4 >> ss_hor) as isize, ), ); - if resize { - let offset = (sby * 4 + 2) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize; - // FIXME incorrect; should be kept as an offset for later slices. - bot = &*f.lf.cdef_line_buf.element_as( - (f.lf.cdef_lpf_line[pl] as isize + offset) as usize, - ); + let bottom = if resize { + ( + &f.lf.cdef_line_buf, + f.lf.cdef_lpf_line[pl].wrapping_add_signed( + (sby * 4 + 2) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), + ) } else { let line = sby * ((4 as c_int) << sb128) + 4 * sb128 as c_int + 2; - let offset = - line as isize * uv_stride + (bx * 4 >> ss_hor) as isize; - // FIXME incorrect; should be kept as an offset for later slices. - bot = &*f.lf.lr_line_buf.element_as( - (f.lf.lr_lpf_line[pl] as isize + offset) as usize, - ); - } - st_uv = false; + ( + &f.lf.lr_line_buf, + f.lf.lr_lpf_line[pl].wrapping_add_signed( + line as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), + ) + }; + Some((top, CdefBottom::LineBuf(bottom))) } else { - st_uv = true; - } + None + }; - if st_uv { - top = ( + let (top, bot) = top_bot.unwrap_or_else(|| { + let top = ( &f.lf.cdef_line_buf, f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( have_tt as isize * (sby * 8) as isize * uv_stride @@ -433,8 +427,8 @@ pub(crate) unsafe fn rav1d_cdef_brow( ), ); let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - bot = bottom.as_ptr::(); - } + (top, CdefBottom::Pic(bottom)) + }); f.dsp.cdef.fb[uv_idx as usize].call::( bptrs[pl], From 390e2d7a7c949952b6e1a7e4669823d78f07756d Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Fri, 21 Jun 2024 13:57:38 -0700 Subject: [PATCH 04/17] `cdef_filter_block_rust`: Take safe args --- src/cdef.rs | 70 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index e3624afdf..3c90e9bf3 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -19,7 +19,6 @@ use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ptr; -use std::slice; #[cfg(all( feature = "asm", @@ -45,8 +44,8 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( dst_ptr: *mut DynPixel, stride: ptrdiff_t, left: *const [LeftPixelRow2px; 8], - top: *const DynPixel, - bottom: *const DynPixel, + top_ptr: *const DynPixel, + bottom_ptr: *const DynPixel, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -54,8 +53,11 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, + _top: *const FFISafe>>>, + _bottom: *const FFISafe>, ) -> ()); +#[derive(Clone, Copy)] pub enum CdefBottom<'a> { Pic(Rav1dPictureDataComponentOffset<'a>), LineBuf((&'a DisjointMut>>, usize)), @@ -83,13 +85,15 @@ impl cdef::Fn { let dst_ptr = dst.as_mut_ptr::().cast(); let stride = dst.stride(); let left = ptr::from_ref(left).cast(); - let top = (&*top.element_as(top_off) as *const BD::Pixel).cast(); - let bottom = match bottom { + let top_ptr = (&*top.element_as(top_off) as *const BD::Pixel).cast(); + let bottom_ptr = match bottom { CdefBottom::Pic(bot) => bot.as_ptr::().cast(), CdefBottom::LineBuf((bot, bot_off)) => { (&*bot.element_as(bot_off) as *const BD::Pixel).cast() } }; + let top = FFISafe::new(top); + let bottom = FFISafe::new(&bottom); let sec_strength = sec_strength as c_int; let damping = damping as c_int; let bd = bd.into_c(); @@ -98,8 +102,8 @@ impl cdef::Fn { dst_ptr, stride, left, - top, - bottom, + top_ptr, + bottom_ptr, pri_strength, sec_strength, dir, @@ -107,6 +111,8 @@ impl cdef::Fn { edges, bd, dst, + top, + bottom, ) } } @@ -162,17 +168,21 @@ pub fn fill(tmp: &mut [i16], w: usize, h: usize) { } } -unsafe fn padding( +fn padding( tmp: &mut [i16; TMP_STRIDE * TMP_STRIDE], src: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: *const BD::Pixel, - bottom: *const BD::Pixel, + (top, top_off): (&DisjointMut>>, usize), + mut bottom: CdefBottom<'_>, w: usize, h: usize, edges: CdefEdgeFlags, ) { - let [top, bottom] = [top, bottom].map(|it| it.sub(2)); + let top_off = top_off - 2; + match &mut bottom { + CdefBottom::Pic(pic) => *pic -= 2usize, + CdefBottom::LineBuf((_, offset)) => *offset -= 2, + } let stride = src.pixel_stride::(); // Fill extended input buffer. @@ -198,7 +208,8 @@ unsafe fn padding( } for (i, y) in (y_start..2).enumerate() { - let top = slice::from_raw_parts(top.offset(i as isize * stride), x_end); + let offset = top_off.wrapping_add_signed(i as isize * stride); + let top = top.slice_as::<_, BD::Pixel>((offset.., ..x_end)); for x in x_start..x_end { tmp[x + y * TMP_STRIDE] = top[x].as_::(); } @@ -218,7 +229,14 @@ unsafe fn padding( } for (i, y) in (h + 2..y_end).enumerate() { let tmp = &mut tmp[y * TMP_STRIDE..]; - let bottom = slice::from_raw_parts(bottom.offset(i as isize * stride), x_end); + let offset = i as isize * stride; + let bottom = match bottom { + CdefBottom::Pic(pic) => &*(pic + offset).slice::(x_end), + CdefBottom::LineBuf((buf, off)) => { + let offset = off.wrapping_add_signed(offset); + &*buf.slice_as((offset.., ..x_end)) + } + }; for x in x_start..x_end { tmp[x] = bottom[x].as_::(); } @@ -226,11 +244,11 @@ unsafe fn padding( } #[inline(never)] -unsafe fn cdef_filter_block_rust( +fn cdef_filter_block_rust( dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: *const BD::Pixel, - bottom: *const BD::Pixel, + top: (&DisjointMut>>, usize), + bottom: CdefBottom<'_>, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -364,8 +382,8 @@ unsafe extern "C" fn cdef_filter_block_c_erased; 8], - top: *const DynPixel, - bottom: *const DynPixel, + top_ptr: *const DynPixel, + _bottom_ptr: *const DynPixel, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -373,18 +391,26 @@ unsafe extern "C" fn cdef_filter_block_c_erased, + top: *const FFISafe>>>, + bottom: *const FFISafe>, ) { // SAFETY: Was passed as `FFISafe::new(_)` in `cdef_dir::Fn::call`. let dst = *unsafe { FFISafe::get(dst) }; // SAFETY: Reverse of cast in `cdef::Fn::call`. let left = unsafe { &*left.cast() }; - let top = top.cast(); - let bottom = bottom.cast(); + // SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`. + let top = unsafe { FFISafe::get(top) }; + let top_base = top.as_mut_ptr().cast::().cast_const(); + // SAFETY: Reverse of what was done ine `cdef::Fn::call`. `top_ptr` is + // derived from `top` and so is safe to calculate the offset from. + let top_off = unsafe { top_ptr.cast::().offset_from(top_base) } as usize; + // SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`. + let bottom = *unsafe { FFISafe::get(bottom) }; let bd = BD::from_c(bitdepth_max); cdef_filter_block_rust( dst, left, - top, + (top, top_off), bottom, pri_strength, sec_strength, @@ -536,6 +562,8 @@ unsafe extern "C" fn cdef_filter_neon_erased< edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, + _top: *const FFISafe>>>, + _bottom: *const FFISafe>, ) { use crate::src::align::Align16; From 6b9138600ddf69ccf3e09acba88797933e94c115 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Fri, 21 Jun 2024 14:00:14 -0700 Subject: [PATCH 05/17] `cdef::Fn::call`: Make safe along with callers --- src/cdef.rs | 37 ++++++++++++++++++++----------------- src/cdef_apply.rs | 2 +- src/internal.rs | 3 +-- src/recon.rs | 6 ++---- src/thread_task.rs | 5 +---- 5 files changed, 25 insertions(+), 28 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index 3c90e9bf3..130cf2123 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -69,7 +69,7 @@ impl cdef::Fn { /// then the pre-filter data is located in `dst`. /// However, the edge pixels above `dst` may be post-filter, /// so in order to get access to pre-filter top pixels, use `top`. - pub unsafe fn call( + pub fn call( &self, dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], @@ -98,22 +98,25 @@ impl cdef::Fn { let damping = damping as c_int; let bd = bd.into_c(); let dst = FFISafe::new(&dst); - self.get()( - dst_ptr, - stride, - left, - top_ptr, - bottom_ptr, - pri_strength, - sec_strength, - dir, - damping, - edges, - bd, - dst, - top, - bottom, - ) + // SAFETY: Rust fallback is safe, asm is assumed to do the same. + unsafe { + self.get()( + dst_ptr, + stride, + left, + top_ptr, + bottom_ptr, + pri_strength, + sec_strength, + dir, + damping, + edges, + bd, + dst, + top, + bottom, + ) + } } } diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 5f0567c44..8c4102625 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -126,7 +126,7 @@ fn adjust_strength(strength: u8, var: c_uint) -> c_int { strength as c_int * (4 + i) + 8 >> 4 } -pub(crate) unsafe fn rav1d_cdef_brow( +pub(crate) fn rav1d_cdef_brow( c: &Rav1dContext, tc: &mut Rav1dTaskContext, f: &Rav1dFrameData, diff --git a/src/internal.rs b/src/internal.rs index 595570659..fcbd7dd66 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -509,8 +509,7 @@ pub(crate) struct Rav1dFrameContext_bd_fn { pub filter_sbrow: filter_sbrow_fn, pub filter_sbrow_deblock_cols: filter_sbrow_fn, pub filter_sbrow_deblock_rows: filter_sbrow_fn, - pub filter_sbrow_cdef: - unsafe fn(&Rav1dContext, &Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (), + pub filter_sbrow_cdef: fn(&Rav1dContext, &Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (), pub filter_sbrow_resize: filter_sbrow_fn, pub filter_sbrow_lr: filter_sbrow_fn, pub backup_ipred_edge: backup_ipred_edge_fn, diff --git a/src/recon.rs b/src/recon.rs index f7c34e6de..f8c0c2707 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -3709,14 +3709,12 @@ pub(crate) fn rav1d_filter_sbrow_cdef( let ss_ver = f.cur.p.layout == Rav1dPixelLayout::I420 && i != 0; p[i] - ((8 * p[i].pixel_stride::()) >> ss_ver as u8) }); - // TODO make safe - unsafe { rav1d_cdef_brow::(c, tc, f, p_up, prev_mask, start - 2, start, true, sby) }; + rav1d_cdef_brow::(c, tc, f, p_up, prev_mask, start - 2, start, true, sby); } let n_blks = sbsz - 2 * ((sby + 1) < f.sbh) as c_int; let end = cmp::min(start + n_blks, f.bh); - // TODO make safe - unsafe { rav1d_cdef_brow::(c, tc, f, p, mask_offset, start, end, false, sby) }; + rav1d_cdef_brow::(c, tc, f, p, mask_offset, start, end, false, sby); } pub(crate) fn rav1d_filter_sbrow_resize( diff --git a/src/thread_task.rs b/src/thread_task.rs index 9bfac2cc4..a801ad671 100644 --- a/src/thread_task.rs +++ b/src/thread_task.rs @@ -1275,10 +1275,7 @@ pub fn rav1d_worker_task(task_thread: Arc) { let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); if seq_hdr.cdef != 0 { if fc.task_thread.error.load(Ordering::SeqCst) == 0 { - // SAFETY: TODO make safe - unsafe { - (f.bd_fn().filter_sbrow_cdef)(c, &f, &mut tc, sby); - } + (f.bd_fn().filter_sbrow_cdef)(c, &f, &mut tc, sby); } drop(f); reset_task_cur_async(ttd, t.frame_idx, c.fc.len() as u32); From 5eeac73187fcb7920eca96bfc8a101da50d68caf Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Fri, 21 Jun 2024 14:09:12 -0700 Subject: [PATCH 06/17] `cdef_filter_block_c_erased`: Deny unsafe ops --- src/cdef.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cdef.rs b/src/cdef.rs index 130cf2123..6a1e47620 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -381,6 +381,7 @@ fn cdef_filter_block_rust( /// # Safety /// /// Must be called by [`cdef::Fn::call`]. +#[deny(unsafe_op_in_unsafe_fn)] unsafe extern "C" fn cdef_filter_block_c_erased( _dst_ptr: *mut DynPixel, _stride: ptrdiff_t, From afc7b7398135e2c5ac1718b5174772a76c4e824d Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Fri, 21 Jun 2024 14:11:28 -0700 Subject: [PATCH 07/17] `cdef_apply.rs`: Deny unsafe --- src/cdef_apply.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 8c4102625..c0e9ec444 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -1,3 +1,5 @@ +#![deny(unsafe_code)] + use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BPC; use crate::include::common::intops::ulog2; From f0249a11b7adc193d1ff02eceb65fc383bf0200a Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 24 Jun 2024 13:54:50 -0700 Subject: [PATCH 08/17] `cdef.rs`: Use `AlignedVec64` --- src/cdef.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index 6a1e47620..541b84372 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -5,8 +5,7 @@ use crate::include::common::bitdepth::LeftPixelRow2px; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; -use crate::src::align::Align64; -use crate::src::align::AlignedVec; +use crate::src::align::AlignedVec64; use crate::src::cpu::CpuFlags; use crate::src::disjoint_mut::DisjointMut; use crate::src::ffi_safe::FFISafe; @@ -53,14 +52,14 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, - _top: *const FFISafe>>>, + _top: *const FFISafe>>, _bottom: *const FFISafe>, ) -> ()); #[derive(Clone, Copy)] pub enum CdefBottom<'a> { Pic(Rav1dPictureDataComponentOffset<'a>), - LineBuf((&'a DisjointMut>>, usize)), + LineBuf((&'a DisjointMut>, usize)), } impl cdef::Fn { @@ -73,7 +72,7 @@ impl cdef::Fn { &self, dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - (top, top_off): (&DisjointMut>>, usize), + (top, top_off): (&DisjointMut>, usize), bottom: CdefBottom<'_>, pri_strength: c_int, sec_strength: u8, @@ -175,7 +174,7 @@ fn padding( tmp: &mut [i16; TMP_STRIDE * TMP_STRIDE], src: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - (top, top_off): (&DisjointMut>>, usize), + (top, top_off): (&DisjointMut>, usize), mut bottom: CdefBottom<'_>, w: usize, h: usize, @@ -250,7 +249,7 @@ fn padding( fn cdef_filter_block_rust( dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: (&DisjointMut>>, usize), + top: (&DisjointMut>, usize), bottom: CdefBottom<'_>, pri_strength: c_int, sec_strength: c_int, @@ -395,7 +394,7 @@ unsafe extern "C" fn cdef_filter_block_c_erased, - top: *const FFISafe>>>, + top: *const FFISafe>>, bottom: *const FFISafe>, ) { // SAFETY: Was passed as `FFISafe::new(_)` in `cdef_dir::Fn::call`. @@ -566,7 +565,7 @@ unsafe extern "C" fn cdef_filter_neon_erased< edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, - _top: *const FFISafe>>>, + _top: *const FFISafe>>, _bottom: *const FFISafe>, ) { use crate::src::align::Align16; From 04206006d269f150ef4d26381342bba28ebbdd0c Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 24 Jun 2024 15:21:04 -0700 Subject: [PATCH 09/17] `CdefBottom`: Remove offset --- src/cdef.rs | 58 +++++++++++++++++++++++++++-------------------- src/cdef_apply.rs | 26 +++++++++++++-------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index 541b84372..cd152e3b2 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -4,6 +4,7 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::bitdepth::LeftPixelRow2px; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; +use crate::include::dav1d::picture::Rav1dPictureDataComponent; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; use crate::src::align::AlignedVec64; use crate::src::cpu::CpuFlags; @@ -58,8 +59,8 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( #[derive(Clone, Copy)] pub enum CdefBottom<'a> { - Pic(Rav1dPictureDataComponentOffset<'a>), - LineBuf((&'a DisjointMut>, usize)), + Pic(&'a Rav1dPictureDataComponent), + LineBuf(&'a DisjointMut>), } impl cdef::Fn { @@ -72,8 +73,10 @@ impl cdef::Fn { &self, dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - (top, top_off): (&DisjointMut>, usize), + top: &DisjointMut>, + top_off: usize, bottom: CdefBottom<'_>, + bottom_off: usize, pri_strength: c_int, sec_strength: u8, dir: c_int, @@ -86,10 +89,8 @@ impl cdef::Fn { let left = ptr::from_ref(left).cast(); let top_ptr = (&*top.element_as(top_off) as *const BD::Pixel).cast(); let bottom_ptr = match bottom { - CdefBottom::Pic(bot) => bot.as_ptr::().cast(), - CdefBottom::LineBuf((bot, bot_off)) => { - (&*bot.element_as(bot_off) as *const BD::Pixel).cast() - } + CdefBottom::Pic(pic) => pic.as_ptr_at::(bottom_off).cast(), + CdefBottom::LineBuf(buf) => (&*buf.element_as(bottom_off) as *const BD::Pixel).cast(), }; let top = FFISafe::new(top); let bottom = FFISafe::new(&bottom); @@ -174,17 +175,16 @@ fn padding( tmp: &mut [i16; TMP_STRIDE * TMP_STRIDE], src: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - (top, top_off): (&DisjointMut>, usize), - mut bottom: CdefBottom<'_>, + top: &DisjointMut>, + top_off: usize, + bottom: CdefBottom<'_>, + bottom_off: usize, w: usize, h: usize, edges: CdefEdgeFlags, ) { let top_off = top_off - 2; - match &mut bottom { - CdefBottom::Pic(pic) => *pic -= 2usize, - CdefBottom::LineBuf((_, offset)) => *offset -= 2, - } + let bottom_off = bottom_off - 2; let stride = src.pixel_stride::(); // Fill extended input buffer. @@ -231,13 +231,10 @@ fn padding( } for (i, y) in (h + 2..y_end).enumerate() { let tmp = &mut tmp[y * TMP_STRIDE..]; - let offset = i as isize * stride; + let bottom_off = bottom_off.wrapping_add_signed(i as isize * stride); let bottom = match bottom { - CdefBottom::Pic(pic) => &*(pic + offset).slice::(x_end), - CdefBottom::LineBuf((buf, off)) => { - let offset = off.wrapping_add_signed(offset); - &*buf.slice_as((offset.., ..x_end)) - } + CdefBottom::Pic(pic) => &*pic.slice::((bottom_off.., ..x_end)), + CdefBottom::LineBuf(buf) => &*buf.slice_as((bottom_off.., ..x_end)), }; for x in x_start..x_end { tmp[x] = bottom[x].as_::(); @@ -249,8 +246,10 @@ fn padding( fn cdef_filter_block_rust( dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: (&DisjointMut>, usize), + top: &DisjointMut>, + top_off: usize, bottom: CdefBottom<'_>, + bottom_off: usize, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -265,7 +264,9 @@ fn cdef_filter_block_rust( assert!((w == 4 || w == 8) && (h == 4 || h == 8)); let mut tmp = [0; TMP_STRIDE * TMP_STRIDE]; // `12 * 12` is the maximum value of `TMP_STRIDE * (h + 4)`. - padding::(&mut tmp, dst, left, top, bottom, w, h, edges); + padding::( + &mut tmp, dst, left, top, top_off, bottom, bottom_off, w, h, edges, + ); let tmp = tmp; let tmp_offset = 2 * TMP_STRIDE + 2; @@ -386,7 +387,7 @@ unsafe extern "C" fn cdef_filter_block_c_erased; 8], top_ptr: *const DynPixel, - _bottom_ptr: *const DynPixel, + bottom_ptr: *const DynPixel, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -404,17 +405,26 @@ unsafe extern "C" fn cdef_filter_block_c_erased().cast_const(); - // SAFETY: Reverse of what was done ine `cdef::Fn::call`. `top_ptr` is + // SAFETY: Reverse of what was done in `cdef::Fn::call`. `top_ptr` is // derived from `top` and so is safe to calculate the offset from. let top_off = unsafe { top_ptr.cast::().offset_from(top_base) } as usize; // SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`. let bottom = *unsafe { FFISafe::get(bottom) }; + let bottom_base = match bottom { + CdefBottom::Pic(pic) => pic.as_ptr::(), + CdefBottom::LineBuf(buf) => buf.as_mut_ptr().cast::().cast_const(), + }; + // SAFETY: Reverse of what was done in `cdef::Fn::call`. `bottom_ptr` is + // derived from `bottom` and so is safe to calculate the offset from. + let bottom_off = unsafe { bottom_ptr.cast::().offset_from(bottom_base) } as usize; let bd = BD::from_c(bitdepth_max); cdef_filter_block_rust( dst, left, - (top, top_off), + top, + top_off, bottom, + bottom_off, pri_strength, sec_strength, dir, diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index c0e9ec444..d973c851a 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -284,7 +284,7 @@ pub(crate) fn rav1d_cdef_brow( ) }; let bottom = bptrs[0] + (8 * y_stride); - Some((top, CdefBottom::Pic(bottom))) + Some((top, CdefBottom::Pic(bottom.data), bottom.offset)) } else if !sbrow_start && by + 2 >= by_end { let top = ( &f.lf.cdef_line_buf, @@ -292,7 +292,7 @@ pub(crate) fn rav1d_cdef_brow( (sby * 4) as isize * y_stride + (bx * 4) as isize, ), ); - let bottom = if resize { + let (buf, offset) = if resize { ( &f.lf.cdef_line_buf, f.lf.cdef_lpf_line[0].wrapping_add_signed( @@ -308,12 +308,12 @@ pub(crate) fn rav1d_cdef_brow( ), ) }; - Some((top, CdefBottom::LineBuf(bottom))) + Some((top, CdefBottom::LineBuf(buf), offset)) } else { None }; - let (top, bot) = top_bot.unwrap_or_else(|| { + let ((top, top_off), bot, bot_off) = top_bot.unwrap_or_else(|| { let top = ( &f.lf.cdef_line_buf, f.lf.cdef_line[tf as usize][0].wrapping_add_signed( @@ -322,7 +322,7 @@ pub(crate) fn rav1d_cdef_brow( ), ); let bottom = bptrs[0] + (8 * y_stride); - (top, CdefBottom::Pic(bottom)) + (top, CdefBottom::Pic(bottom.data), bottom.offset) }); if y_pri_lvl != 0 { @@ -332,7 +332,9 @@ pub(crate) fn rav1d_cdef_brow( bptrs[0], &lr_bak[bit as usize][0], top, + top_off, bot, + bot_off, adj_y_pri_lvl, y_sec_lvl, dir, @@ -346,7 +348,9 @@ pub(crate) fn rav1d_cdef_brow( bptrs[0], &lr_bak[bit as usize][0], top, + top_off, bot, + bot_off, 0, y_sec_lvl, 0, @@ -387,7 +391,7 @@ pub(crate) fn rav1d_cdef_brow( ) }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - Some((top, CdefBottom::Pic(bottom))) + Some((top, CdefBottom::Pic(bottom.data), bottom.offset)) } else if !sbrow_start && by + 2 >= by_end { let top = ( &f.lf.cdef_line_buf, @@ -396,7 +400,7 @@ pub(crate) fn rav1d_cdef_brow( + (bx * 4 >> ss_hor) as isize, ), ); - let bottom = if resize { + let (buf, offset) = if resize { ( &f.lf.cdef_line_buf, f.lf.cdef_lpf_line[pl].wrapping_add_signed( @@ -415,12 +419,12 @@ pub(crate) fn rav1d_cdef_brow( ), ) }; - Some((top, CdefBottom::LineBuf(bottom))) + Some((top, CdefBottom::LineBuf(buf), offset)) } else { None }; - let (top, bot) = top_bot.unwrap_or_else(|| { + let ((top, top_off), bot, bot_off) = top_bot.unwrap_or_else(|| { let top = ( &f.lf.cdef_line_buf, f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( @@ -429,14 +433,16 @@ pub(crate) fn rav1d_cdef_brow( ), ); let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - (top, CdefBottom::Pic(bottom)) + (top, CdefBottom::Pic(bottom.data), bottom.offset) }); f.dsp.cdef.fb[uv_idx as usize].call::( bptrs[pl], &lr_bak[bit as usize][pl], top, + top_off, bot, + bot_off, uv_pri_lvl.into(), uv_sec_lvl, uvdir, From 21812fb4b53e74dbfb7d09ce06b6ba716b6fc8e7 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 24 Jun 2024 16:11:13 -0700 Subject: [PATCH 10/17] `rav1d_cdef_brow`: Remove unnecessary type casts --- src/cdef_apply.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index d973c851a..46332ff9a 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -278,7 +278,7 @@ pub(crate) fn rav1d_cdef_brow( ( &f.lf.lr_line_buf, f.lf.lr_lpf_line[0].wrapping_add_signed( - (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride + (sby * (4 << sb128) - 4) as isize * y_stride + (bx * 4) as isize, ), ) @@ -300,7 +300,7 @@ pub(crate) fn rav1d_cdef_brow( ), ) } else { - let line = sby * ((4 as c_int) << sb128) + 4 * sb128 as c_int + 2; + let line = sby * (4 << sb128) + 4 * sb128 as c_int + 2; ( &f.lf.lr_line_buf, f.lf.lr_lpf_line[0].wrapping_add_signed( @@ -381,7 +381,7 @@ pub(crate) fn rav1d_cdef_brow( ), ) } else { - let line = sby * ((4 as c_int) << sb128) - 4; + let line = sby * (4 << sb128) - 4; ( &f.lf.lr_line_buf, f.lf.lr_lpf_line[pl].wrapping_add_signed( @@ -409,8 +409,7 @@ pub(crate) fn rav1d_cdef_brow( ), ) } else { - let line = - sby * ((4 as c_int) << sb128) + 4 * sb128 as c_int + 2; + let line = sby * (4 << sb128) + 4 * sb128 as c_int + 2; ( &f.lf.lr_line_buf, f.lf.lr_lpf_line[pl].wrapping_add_signed( From 7d4c894a8fe13aab0ebf5ff339e13e8400f0b01c Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 24 Jun 2024 16:14:28 -0700 Subject: [PATCH 11/17] `padding`: Add note about matching on `bottom` --- src/cdef.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cdef.rs b/src/cdef.rs index cd152e3b2..614413891 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -232,6 +232,8 @@ fn padding( for (i, y) in (h + 2..y_end).enumerate() { let tmp = &mut tmp[y * TMP_STRIDE..]; let bottom_off = bottom_off.wrapping_add_signed(i as isize * stride); + // This is a fallback `fn`, so perf is not as important here, so an extra branch + // here should be okay. let bottom = match bottom { CdefBottom::Pic(pic) => &*pic.slice::((bottom_off.., ..x_end)), CdefBottom::LineBuf(buf) => &*buf.slice_as((bottom_off.., ..x_end)), From c5ab41f17cfdae096bf20cbfefc234abcb55d61c Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 8 Jul 2024 10:41:19 -0700 Subject: [PATCH 12/17] Use `Pixels` trait --- src/cdef.rs | 2 ++ src/pixels.rs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cdef.rs b/src/cdef.rs index 614413891..d7d18cacb 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -19,6 +19,7 @@ use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ptr; +use crate::src::pixels::Pixels; #[cfg(all( feature = "asm", @@ -29,6 +30,7 @@ use crate::include::common::bitdepth::bd_fn; #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] use crate::include::common::bitdepth::{bpc_fn, BPC}; + bitflags! { #[repr(transparent)] #[derive(Clone, Copy)] diff --git a/src/pixels.rs b/src/pixels.rs index 5b04e1d7a..5c17f6b65 100644 --- a/src/pixels.rs +++ b/src/pixels.rs @@ -24,7 +24,7 @@ pub trait Pixels { } /// Absolute ptr to [`BitDepth::Pixel`]s. - fn _as_ptr(&self) -> *const BD::Pixel { + fn as_ptr(&self) -> *const BD::Pixel { self.as_mut_ptr::().cast_const() } From bb94c7fee95aae67d465169159179b3cf4600ef2 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 8 Jul 2024 13:14:07 -0700 Subject: [PATCH 13/17] Make `CdefBottom` a `PicOrBuf` --- src/cdef.rs | 24 ++++++++---------------- src/cdef_apply.rs | 29 ++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index d7d18cacb..95ac24fea 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -4,12 +4,13 @@ use crate::include::common::bitdepth::DynPixel; use crate::include::common::bitdepth::LeftPixelRow2px; use crate::include::common::intops::apply_sign; use crate::include::common::intops::iclip; -use crate::include::dav1d::picture::Rav1dPictureDataComponent; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; use crate::src::align::AlignedVec64; use crate::src::cpu::CpuFlags; use crate::src::disjoint_mut::DisjointMut; use crate::src::ffi_safe::FFISafe; +use crate::src::pic_or_buf::PicOrBuf; +use crate::src::pixels::Pixels; use crate::src::strided::Strided as _; use crate::src::tables::dav1d_cdef_directions; use crate::src::wrap_fn_ptr::wrap_fn_ptr; @@ -19,7 +20,6 @@ use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ptr; -use crate::src::pixels::Pixels; #[cfg(all( feature = "asm", @@ -30,7 +30,6 @@ use crate::include::common::bitdepth::bd_fn; #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] use crate::include::common::bitdepth::{bpc_fn, BPC}; - bitflags! { #[repr(transparent)] #[derive(Clone, Copy)] @@ -59,11 +58,7 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( _bottom: *const FFISafe>, ) -> ()); -#[derive(Clone, Copy)] -pub enum CdefBottom<'a> { - Pic(&'a Rav1dPictureDataComponent), - LineBuf(&'a DisjointMut>), -} +pub type CdefBottom<'a> = PicOrBuf<'a, AlignedVec64>; impl cdef::Fn { /// CDEF operates entirely on pre-filter data. @@ -91,8 +86,8 @@ impl cdef::Fn { let left = ptr::from_ref(left).cast(); let top_ptr = (&*top.element_as(top_off) as *const BD::Pixel).cast(); let bottom_ptr = match bottom { - CdefBottom::Pic(pic) => pic.as_ptr_at::(bottom_off).cast(), - CdefBottom::LineBuf(buf) => (&*buf.element_as(bottom_off) as *const BD::Pixel).cast(), + PicOrBuf::Pic(pic) => pic.as_ptr_at::(bottom_off).cast(), + PicOrBuf::Buf(buf) => (&*buf.element_as(bottom_off) as *const BD::Pixel).cast(), }; let top = FFISafe::new(top); let bottom = FFISafe::new(&bottom); @@ -237,8 +232,8 @@ fn padding( // This is a fallback `fn`, so perf is not as important here, so an extra branch // here should be okay. let bottom = match bottom { - CdefBottom::Pic(pic) => &*pic.slice::((bottom_off.., ..x_end)), - CdefBottom::LineBuf(buf) => &*buf.slice_as((bottom_off.., ..x_end)), + PicOrBuf::Pic(pic) => &*pic.slice::((bottom_off.., ..x_end)), + PicOrBuf::Buf(buf) => &*buf.slice_as((bottom_off.., ..x_end)), }; for x in x_start..x_end { tmp[x] = bottom[x].as_::(); @@ -414,10 +409,7 @@ unsafe extern "C" fn cdef_filter_block_c_erased().offset_from(top_base) } as usize; // SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`. let bottom = *unsafe { FFISafe::get(bottom) }; - let bottom_base = match bottom { - CdefBottom::Pic(pic) => pic.as_ptr::(), - CdefBottom::LineBuf(buf) => buf.as_mut_ptr().cast::().cast_const(), - }; + let bottom_base = bottom.as_ptr::(); // SAFETY: Reverse of what was done in `cdef::Fn::call`. `bottom_ptr` is // derived from `bottom` and so is safe to calculate the offset from. let bottom_off = unsafe { bottom_ptr.cast::().offset_from(bottom_base) } as usize; diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 46332ff9a..e6ad0644a 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -7,13 +7,14 @@ use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::include::dav1d::picture::Rav1dPictureDataComponentOffset; use crate::src::align::Align16; use crate::src::align::AlignedVec64; -use crate::src::cdef::CdefBottom; use crate::src::cdef::CdefEdgeFlags; use crate::src::disjoint_mut::DisjointMut; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dFrameData; use crate::src::internal::Rav1dTaskContext; +use crate::src::pic_or_buf::PicOrBuf; use crate::src::strided::Strided as _; +use crate::src::strided::WithStride; use bitflags::bitflags; use libc::ptrdiff_t; use std::cmp; @@ -284,7 +285,7 @@ pub(crate) fn rav1d_cdef_brow( ) }; let bottom = bptrs[0] + (8 * y_stride); - Some((top, CdefBottom::Pic(bottom.data), bottom.offset)) + Some((top, PicOrBuf::Pic(bottom.data), bottom.offset)) } else if !sbrow_start && by + 2 >= by_end { let top = ( &f.lf.cdef_line_buf, @@ -308,7 +309,14 @@ pub(crate) fn rav1d_cdef_brow( ), ) }; - Some((top, CdefBottom::LineBuf(buf), offset)) + Some(( + top, + PicOrBuf::Buf(WithStride { + buf, + stride: y_stride, + }), + offset, + )) } else { None }; @@ -322,7 +330,7 @@ pub(crate) fn rav1d_cdef_brow( ), ); let bottom = bptrs[0] + (8 * y_stride); - (top, CdefBottom::Pic(bottom.data), bottom.offset) + (top, PicOrBuf::Pic(bottom.data), bottom.offset) }); if y_pri_lvl != 0 { @@ -391,7 +399,7 @@ pub(crate) fn rav1d_cdef_brow( ) }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - Some((top, CdefBottom::Pic(bottom.data), bottom.offset)) + Some((top, PicOrBuf::Pic(bottom.data), bottom.offset)) } else if !sbrow_start && by + 2 >= by_end { let top = ( &f.lf.cdef_line_buf, @@ -418,7 +426,14 @@ pub(crate) fn rav1d_cdef_brow( ), ) }; - Some((top, CdefBottom::LineBuf(buf), offset)) + Some(( + top, + PicOrBuf::Buf(WithStride { + buf, + stride: uv_stride, + }), + offset, + )) } else { None }; @@ -432,7 +447,7 @@ pub(crate) fn rav1d_cdef_brow( ), ); let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - (top, CdefBottom::Pic(bottom.data), bottom.offset) + (top, PicOrBuf::Pic(bottom.data), bottom.offset) }); f.dsp.cdef.fb[uv_idx as usize].call::( From 57ef1c1a96a4f4ab0143fad9930d2618db4074c7 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 8 Jul 2024 15:29:56 -0700 Subject: [PATCH 14/17] `cdef::Fn::call`: Use `WithOffset` for `top` and `bottom` args --- src/cdef.rs | 68 +++++++++----------------- src/cdef_apply.rs | 121 +++++++++++++++++++++++----------------------- 2 files changed, 85 insertions(+), 104 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index 95ac24fea..dcd8133c1 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -10,9 +10,9 @@ use crate::src::cpu::CpuFlags; use crate::src::disjoint_mut::DisjointMut; use crate::src::ffi_safe::FFISafe; use crate::src::pic_or_buf::PicOrBuf; -use crate::src::pixels::Pixels; use crate::src::strided::Strided as _; use crate::src::tables::dav1d_cdef_directions; +use crate::src::with_offset::WithOffset; use crate::src::wrap_fn_ptr::wrap_fn_ptr; use bitflags::bitflags; use libc::ptrdiff_t; @@ -54,11 +54,12 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, - _top: *const FFISafe>>, + _top: *const FFISafe>, _bottom: *const FFISafe>, ) -> ()); -pub type CdefBottom<'a> = PicOrBuf<'a, AlignedVec64>; +pub type CdefTop<'a> = WithOffset<&'a DisjointMut>>; +pub type CdefBottom<'a> = WithOffset>>; impl cdef::Fn { /// CDEF operates entirely on pre-filter data. @@ -70,10 +71,8 @@ impl cdef::Fn { &self, dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: &DisjointMut>, - top_off: usize, + top: CdefTop<'_>, bottom: CdefBottom<'_>, - bottom_off: usize, pri_strength: c_int, sec_strength: u8, dir: c_int, @@ -84,12 +83,9 @@ impl cdef::Fn { let dst_ptr = dst.as_mut_ptr::().cast(); let stride = dst.stride(); let left = ptr::from_ref(left).cast(); - let top_ptr = (&*top.element_as(top_off) as *const BD::Pixel).cast(); - let bottom_ptr = match bottom { - PicOrBuf::Pic(pic) => pic.as_ptr_at::(bottom_off).cast(), - PicOrBuf::Buf(buf) => (&*buf.element_as(bottom_off) as *const BD::Pixel).cast(), - }; - let top = FFISafe::new(top); + let top_ptr = top.as_ptr::().cast(); + let bottom_ptr = bottom.as_ptr::().cast(); + let top = FFISafe::new(&top); let bottom = FFISafe::new(&bottom); let sec_strength = sec_strength as c_int; let damping = damping as c_int; @@ -172,16 +168,14 @@ fn padding( tmp: &mut [i16; TMP_STRIDE * TMP_STRIDE], src: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: &DisjointMut>, - top_off: usize, + top: CdefTop<'_>, bottom: CdefBottom<'_>, - bottom_off: usize, w: usize, h: usize, edges: CdefEdgeFlags, ) { - let top_off = top_off - 2; - let bottom_off = bottom_off - 2; + let top = top - 2_usize; + let bottom = bottom - 2_usize; let stride = src.pixel_stride::(); // Fill extended input buffer. @@ -207,8 +201,8 @@ fn padding( } for (i, y) in (y_start..2).enumerate() { - let offset = top_off.wrapping_add_signed(i as isize * stride); - let top = top.slice_as::<_, BD::Pixel>((offset.., ..x_end)); + let top = top + i as isize * stride; + let top = top.data.slice_as::<_, BD::Pixel>((top.offset.., ..x_end)); for x in x_start..x_end { tmp[x + y * TMP_STRIDE] = top[x].as_::(); } @@ -228,12 +222,12 @@ fn padding( } for (i, y) in (h + 2..y_end).enumerate() { let tmp = &mut tmp[y * TMP_STRIDE..]; - let bottom_off = bottom_off.wrapping_add_signed(i as isize * stride); + let bottom = bottom + i as isize * stride; // This is a fallback `fn`, so perf is not as important here, so an extra branch // here should be okay. - let bottom = match bottom { - PicOrBuf::Pic(pic) => &*pic.slice::((bottom_off.., ..x_end)), - PicOrBuf::Buf(buf) => &*buf.slice_as((bottom_off.., ..x_end)), + let bottom = match bottom.data { + PicOrBuf::Pic(pic) => &*pic.slice::((bottom.offset.., ..x_end)), + PicOrBuf::Buf(buf) => &*buf.slice_as((bottom.offset.., ..x_end)), }; for x in x_start..x_end { tmp[x] = bottom[x].as_::(); @@ -245,10 +239,8 @@ fn padding( fn cdef_filter_block_rust( dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: &DisjointMut>, - top_off: usize, + top: CdefTop<'_>, bottom: CdefBottom<'_>, - bottom_off: usize, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -263,9 +255,7 @@ fn cdef_filter_block_rust( assert!((w == 4 || w == 8) && (h == 4 || h == 8)); let mut tmp = [0; TMP_STRIDE * TMP_STRIDE]; // `12 * 12` is the maximum value of `TMP_STRIDE * (h + 4)`. - padding::( - &mut tmp, dst, left, top, top_off, bottom, bottom_off, w, h, edges, - ); + padding::(&mut tmp, dst, left, top, bottom, w, h, edges); let tmp = tmp; let tmp_offset = 2 * TMP_STRIDE + 2; @@ -385,8 +375,8 @@ unsafe extern "C" fn cdef_filter_block_c_erased; 8], - top_ptr: *const DynPixel, - bottom_ptr: *const DynPixel, + _top_ptr: *const DynPixel, + _bottom_ptr: *const DynPixel, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -394,7 +384,7 @@ unsafe extern "C" fn cdef_filter_block_c_erased, - top: *const FFISafe>>, + top: *const FFISafe>, bottom: *const FFISafe>, ) { // SAFETY: Was passed as `FFISafe::new(_)` in `cdef_dir::Fn::call`. @@ -402,25 +392,15 @@ unsafe extern "C" fn cdef_filter_block_c_erased().cast_const(); - // SAFETY: Reverse of what was done in `cdef::Fn::call`. `top_ptr` is - // derived from `top` and so is safe to calculate the offset from. - let top_off = unsafe { top_ptr.cast::().offset_from(top_base) } as usize; + let top = *unsafe { FFISafe::get(top) }; // SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`. let bottom = *unsafe { FFISafe::get(bottom) }; - let bottom_base = bottom.as_ptr::(); - // SAFETY: Reverse of what was done in `cdef::Fn::call`. `bottom_ptr` is - // derived from `bottom` and so is safe to calculate the offset from. - let bottom_off = unsafe { bottom_ptr.cast::().offset_from(bottom_base) } as usize; let bd = BD::from_c(bitdepth_max); cdef_filter_block_rust( dst, left, top, - top_off, bottom, - bottom_off, pri_strength, sec_strength, dir, @@ -571,7 +551,7 @@ unsafe extern "C" fn cdef_filter_neon_erased< edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, - _top: *const FFISafe>>, + _top: *const FFISafe>, _bottom: *const FFISafe>, ) { use crate::src::align::Align16; diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index e6ad0644a..9986a0a1d 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -15,6 +15,7 @@ use crate::src::internal::Rav1dTaskContext; use crate::src::pic_or_buf::PicOrBuf; use crate::src::strided::Strided as _; use crate::src::strided::WithStride; +use crate::src::with_offset::WithOffset; use bitflags::bitflags; use libc::ptrdiff_t; use std::cmp; @@ -269,30 +270,30 @@ pub(crate) fn rav1d_cdef_brow( None } else if sbrow_start && by == by_start { let top = if resize { - ( - &f.lf.cdef_line_buf, - f.lf.cdef_lpf_line[0].wrapping_add_signed( + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[0].wrapping_add_signed( ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize, ), - ) + } } else { - ( - &f.lf.lr_line_buf, - f.lf.lr_lpf_line[0].wrapping_add_signed( + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[0].wrapping_add_signed( (sby * (4 << sb128) - 4) as isize * y_stride + (bx * 4) as isize, ), - ) + } }; let bottom = bptrs[0] + (8 * y_stride); - Some((top, PicOrBuf::Pic(bottom.data), bottom.offset)) + Some((top, WithOffset::pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { - let top = ( - &f.lf.cdef_line_buf, - f.lf.cdef_line[tf as usize][0].wrapping_add_signed( + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][0].wrapping_add_signed( (sby * 4) as isize * y_stride + (bx * 4) as isize, ), - ); + }; let (buf, offset) = if resize { ( &f.lf.cdef_line_buf, @@ -311,26 +312,28 @@ pub(crate) fn rav1d_cdef_brow( }; Some(( top, - PicOrBuf::Buf(WithStride { - buf, - stride: y_stride, - }), - offset, + WithOffset { + data: PicOrBuf::Buf(WithStride { + buf, + stride: y_stride, + }), + offset, + }, )) } else { None }; - let ((top, top_off), bot, bot_off) = top_bot.unwrap_or_else(|| { - let top = ( - &f.lf.cdef_line_buf, - f.lf.cdef_line[tf as usize][0].wrapping_add_signed( + let (top, bot) = top_bot.unwrap_or_else(|| { + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][0].wrapping_add_signed( have_tt as isize * (sby * 4) as isize * y_stride + (bx * 4) as isize, ), - ); + }; let bottom = bptrs[0] + (8 * y_stride); - (top, PicOrBuf::Pic(bottom.data), bottom.offset) + (top, WithOffset::pic(bottom)) }); if y_pri_lvl != 0 { @@ -340,9 +343,7 @@ pub(crate) fn rav1d_cdef_brow( bptrs[0], &lr_bak[bit as usize][0], top, - top_off, bot, - bot_off, adj_y_pri_lvl, y_sec_lvl, dir, @@ -356,9 +357,7 @@ pub(crate) fn rav1d_cdef_brow( bptrs[0], &lr_bak[bit as usize][0], top, - top_off, bot, - bot_off, 0, y_sec_lvl, 0, @@ -381,33 +380,34 @@ pub(crate) fn rav1d_cdef_brow( None } else if sbrow_start && by == by_start { let top = if resize { - ( - &f.lf.cdef_line_buf, - f.lf.cdef_lpf_line[pl].wrapping_add_signed( + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[pl].wrapping_add_signed( ((sby - 1) * 4) as isize * uv_stride + (bx * 4 >> ss_hor) as isize, ), - ) + } } else { let line = sby * (4 << sb128) - 4; - ( - &f.lf.lr_line_buf, - f.lf.lr_lpf_line[pl].wrapping_add_signed( + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[pl].wrapping_add_signed( line as isize * uv_stride + (bx * 4 >> ss_hor) as isize, ), - ) + } }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - Some((top, PicOrBuf::Pic(bottom.data), bottom.offset)) + Some((top, WithOffset::pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { - let top = ( - &f.lf.cdef_line_buf, - f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( - (sby * 8) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - ); + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][pl] + .wrapping_add_signed( + (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), + }; let (buf, offset) = if resize { ( &f.lf.cdef_line_buf, @@ -428,35 +428,36 @@ pub(crate) fn rav1d_cdef_brow( }; Some(( top, - PicOrBuf::Buf(WithStride { - buf, - stride: uv_stride, - }), - offset, + WithOffset { + data: PicOrBuf::Buf(WithStride { + buf, + stride: uv_stride, + }), + offset, + }, )) } else { None }; - let ((top, top_off), bot, bot_off) = top_bot.unwrap_or_else(|| { - let top = ( - &f.lf.cdef_line_buf, - f.lf.cdef_line[tf as usize][pl].wrapping_add_signed( - have_tt as isize * (sby * 8) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - ); + let (top, bot) = top_bot.unwrap_or_else(|| { + let top = WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_line[tf as usize][pl] + .wrapping_add_signed( + have_tt as isize * (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize, + ), + }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); - (top, PicOrBuf::Pic(bottom.data), bottom.offset) + (top, WithOffset::pic(bottom)) }); f.dsp.cdef.fb[uv_idx as usize].call::( bptrs[pl], &lr_bak[bit as usize][pl], top, - top_off, bot, - bot_off, uv_pri_lvl.into(), uv_sec_lvl, uvdir, From cc0c99a909b82ecced022010e2c42838667821d7 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 10 Jul 2024 12:23:47 -0700 Subject: [PATCH 15/17] `struct Cdef{Top,Bottom}`: Elide unnecessary `<'_>` lifetimes in args. --- src/cdef.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/cdef.rs b/src/cdef.rs index dcd8133c1..d4847fd38 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -54,8 +54,8 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef( edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, - _top: *const FFISafe>, - _bottom: *const FFISafe>, + _top: *const FFISafe, + _bottom: *const FFISafe, ) -> ()); pub type CdefTop<'a> = WithOffset<&'a DisjointMut>>; @@ -71,8 +71,8 @@ impl cdef::Fn { &self, dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: CdefTop<'_>, - bottom: CdefBottom<'_>, + top: CdefTop, + bottom: CdefBottom, pri_strength: c_int, sec_strength: u8, dir: c_int, @@ -168,8 +168,8 @@ fn padding( tmp: &mut [i16; TMP_STRIDE * TMP_STRIDE], src: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: CdefTop<'_>, - bottom: CdefBottom<'_>, + top: CdefTop, + bottom: CdefBottom, w: usize, h: usize, edges: CdefEdgeFlags, @@ -239,8 +239,8 @@ fn padding( fn cdef_filter_block_rust( dst: Rav1dPictureDataComponentOffset, left: &[LeftPixelRow2px; 8], - top: CdefTop<'_>, - bottom: CdefBottom<'_>, + top: CdefTop, + bottom: CdefBottom, pri_strength: c_int, sec_strength: c_int, dir: c_int, @@ -384,8 +384,8 @@ unsafe extern "C" fn cdef_filter_block_c_erased, - top: *const FFISafe>, - bottom: *const FFISafe>, + top: *const FFISafe, + bottom: *const FFISafe, ) { // SAFETY: Was passed as `FFISafe::new(_)` in `cdef_dir::Fn::call`. let dst = *unsafe { FFISafe::get(dst) }; @@ -551,8 +551,8 @@ unsafe extern "C" fn cdef_filter_neon_erased< edges: CdefEdgeFlags, bitdepth_max: c_int, _dst: *const FFISafe, - _top: *const FFISafe>, - _bottom: *const FFISafe>, + _top: *const FFISafe, + _bottom: *const FFISafe, ) { use crate::src::align::Align16; From aacea5843cfbebf44bbc8b5df0d7b560d07267c0 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 10 Jul 2024 12:31:04 -0700 Subject: [PATCH 16/17] `mod cdef_apply`: Replace `.wrapping_add_signed`s in `WithOffset::offset`s with `+`. --- src/cdef_apply.rs | 64 ++++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 9986a0a1d..681a674c8 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -272,28 +272,24 @@ pub(crate) fn rav1d_cdef_brow( let top = if resize { WithOffset { data: &f.lf.cdef_line_buf, - offset: f.lf.cdef_lpf_line[0].wrapping_add_signed( - ((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize, - ), - } + offset: f.lf.cdef_lpf_line[0], + } + ((sby - 1) * 4) as isize * y_stride + + (bx * 4) as isize } else { WithOffset { data: &f.lf.lr_line_buf, - offset: f.lf.lr_lpf_line[0].wrapping_add_signed( - (sby * (4 << sb128) - 4) as isize * y_stride - + (bx * 4) as isize, - ), - } + offset: f.lf.lr_lpf_line[0], + } + (sby * (4 << sb128) - 4) as isize * y_stride + + (bx * 4) as isize }; let bottom = bptrs[0] + (8 * y_stride); Some((top, WithOffset::pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { let top = WithOffset { data: &f.lf.cdef_line_buf, - offset: f.lf.cdef_line[tf as usize][0].wrapping_add_signed( - (sby * 4) as isize * y_stride + (bx * 4) as isize, - ), - }; + offset: f.lf.cdef_line[tf as usize][0], + } + (sby * 4) as isize * y_stride + + (bx * 4) as isize; let (buf, offset) = if resize { ( &f.lf.cdef_line_buf, @@ -327,11 +323,9 @@ pub(crate) fn rav1d_cdef_brow( let (top, bot) = top_bot.unwrap_or_else(|| { let top = WithOffset { data: &f.lf.cdef_line_buf, - offset: f.lf.cdef_line[tf as usize][0].wrapping_add_signed( - have_tt as isize * (sby * 4) as isize * y_stride - + (bx * 4) as isize, - ), - }; + offset: f.lf.cdef_line[tf as usize][0], + } + have_tt as isize * (sby * 4) as isize * y_stride + + (bx * 4) as isize; let bottom = bptrs[0] + (8 * y_stride); (top, WithOffset::pic(bottom)) }); @@ -382,32 +376,25 @@ pub(crate) fn rav1d_cdef_brow( let top = if resize { WithOffset { data: &f.lf.cdef_line_buf, - offset: f.lf.cdef_lpf_line[pl].wrapping_add_signed( - ((sby - 1) * 4) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - } + offset: f.lf.cdef_lpf_line[pl], + } + ((sby - 1) * 4) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize } else { let line = sby * (4 << sb128) - 4; WithOffset { data: &f.lf.lr_line_buf, - offset: f.lf.lr_lpf_line[pl].wrapping_add_signed( - line as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - } + offset: f.lf.lr_lpf_line[pl], + } + line as isize * uv_stride + + (bx * 4 >> ss_hor) as isize }; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); Some((top, WithOffset::pic(bottom))) } else if !sbrow_start && by + 2 >= by_end { let top = WithOffset { data: &f.lf.cdef_line_buf, - offset: f.lf.cdef_line[tf as usize][pl] - .wrapping_add_signed( - (sby * 8) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - }; + offset: f.lf.cdef_line[tf as usize][pl], + } + (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize; let (buf, offset) = if resize { ( &f.lf.cdef_line_buf, @@ -443,12 +430,9 @@ pub(crate) fn rav1d_cdef_brow( let (top, bot) = top_bot.unwrap_or_else(|| { let top = WithOffset { data: &f.lf.cdef_line_buf, - offset: f.lf.cdef_line[tf as usize][pl] - .wrapping_add_signed( - have_tt as isize * (sby * 8) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - }; + offset: f.lf.cdef_line[tf as usize][pl], + } + have_tt as isize * (sby * 8) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize; let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride); (top, WithOffset::pic(bottom)) }); From 0c2f5e4f1fa6fb9b088a26ecc58490c02f722d1f Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 10 Jul 2024 12:44:47 -0700 Subject: [PATCH 17/17] `mod cdef_apply`: Replace `(buf, offset)` with `WithOffset`. --- src/cdef_apply.rs | 58 +++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 681a674c8..88a7ceec0 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -290,30 +290,28 @@ pub(crate) fn rav1d_cdef_brow( offset: f.lf.cdef_line[tf as usize][0], } + (sby * 4) as isize * y_stride + (bx * 4) as isize; - let (buf, offset) = if resize { - ( - &f.lf.cdef_line_buf, - f.lf.cdef_lpf_line[0].wrapping_add_signed( - (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize, - ), - ) + let buf = if resize { + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[0], + } + (sby * 4 + 2) as isize * y_stride + + (bx * 4) as isize } else { let line = sby * (4 << sb128) + 4 * sb128 as c_int + 2; - ( - &f.lf.lr_line_buf, - f.lf.lr_lpf_line[0].wrapping_add_signed( - line as isize * y_stride + (bx * 4) as isize, - ), - ) + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[0], + } + line as isize * y_stride + + (bx * 4) as isize }; Some(( top, WithOffset { data: PicOrBuf::Buf(WithStride { - buf, + buf: buf.data, stride: y_stride, }), - offset, + offset: buf.offset, }, )) } else { @@ -395,32 +393,28 @@ pub(crate) fn rav1d_cdef_brow( offset: f.lf.cdef_line[tf as usize][pl], } + (sby * 8) as isize * uv_stride + (bx * 4 >> ss_hor) as isize; - let (buf, offset) = if resize { - ( - &f.lf.cdef_line_buf, - f.lf.cdef_lpf_line[pl].wrapping_add_signed( - (sby * 4 + 2) as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - ) + let buf = if resize { + WithOffset { + data: &f.lf.cdef_line_buf, + offset: f.lf.cdef_lpf_line[pl], + } + (sby * 4 + 2) as isize * uv_stride + + (bx * 4 >> ss_hor) as isize } else { let line = sby * (4 << sb128) + 4 * sb128 as c_int + 2; - ( - &f.lf.lr_line_buf, - f.lf.lr_lpf_line[pl].wrapping_add_signed( - line as isize * uv_stride - + (bx * 4 >> ss_hor) as isize, - ), - ) + WithOffset { + data: &f.lf.lr_line_buf, + offset: f.lf.lr_lpf_line[pl], + } + line as isize * uv_stride + + (bx * 4 >> ss_hor) as isize }; Some(( top, WithOffset { data: PicOrBuf::Buf(WithStride { - buf, + buf: buf.data, stride: uv_stride, }), - offset, + offset: buf.offset, }, )) } else {