Skip to content

Commit

Permalink
cdef::Fn::call: Use WithOffset for top and bottom args
Browse files Browse the repository at this point in the history
  • Loading branch information
randomPoison authored and kkysen committed Jul 10, 2024
1 parent bb94c7f commit 57ef1c1
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 104 deletions.
68 changes: 24 additions & 44 deletions src/cdef.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ use crate::src::cpu::CpuFlags;
use crate::src::disjoint_mut::DisjointMut;
use crate::src::ffi_safe::FFISafe;
use crate::src::pic_or_buf::PicOrBuf;
use crate::src::pixels::Pixels;
use crate::src::strided::Strided as _;
use crate::src::tables::dav1d_cdef_directions;
use crate::src::with_offset::WithOffset;
use crate::src::wrap_fn_ptr::wrap_fn_ptr;
use bitflags::bitflags;
use libc::ptrdiff_t;
Expand Down Expand Up @@ -54,11 +54,12 @@ wrap_fn_ptr!(pub unsafe extern "C" fn cdef(
edges: CdefEdgeFlags,
bitdepth_max: c_int,
_dst: *const FFISafe<Rav1dPictureDataComponentOffset>,
_top: *const FFISafe<DisjointMut<AlignedVec64<u8>>>,
_top: *const FFISafe<CdefTop<'_>>,
_bottom: *const FFISafe<CdefBottom<'_>>,
) -> ());

pub type CdefBottom<'a> = PicOrBuf<'a, AlignedVec64<u8>>;
pub type CdefTop<'a> = WithOffset<&'a DisjointMut<AlignedVec64<u8>>>;
pub type CdefBottom<'a> = WithOffset<PicOrBuf<'a, AlignedVec64<u8>>>;

impl cdef::Fn {
/// CDEF operates entirely on pre-filter data.
Expand All @@ -70,10 +71,8 @@ impl cdef::Fn {
&self,
dst: Rav1dPictureDataComponentOffset,
left: &[LeftPixelRow2px<BD::Pixel>; 8],
top: &DisjointMut<AlignedVec64<u8>>,
top_off: usize,
top: CdefTop<'_>,
bottom: CdefBottom<'_>,
bottom_off: usize,
pri_strength: c_int,
sec_strength: u8,
dir: c_int,
Expand All @@ -84,12 +83,9 @@ impl cdef::Fn {
let dst_ptr = dst.as_mut_ptr::<BD>().cast();
let stride = dst.stride();
let left = ptr::from_ref(left).cast();
let top_ptr = (&*top.element_as(top_off) as *const BD::Pixel).cast();
let bottom_ptr = match bottom {
PicOrBuf::Pic(pic) => pic.as_ptr_at::<BD>(bottom_off).cast(),
PicOrBuf::Buf(buf) => (&*buf.element_as(bottom_off) as *const BD::Pixel).cast(),
};
let top = FFISafe::new(top);
let top_ptr = top.as_ptr::<BD>().cast();
let bottom_ptr = bottom.as_ptr::<BD>().cast();
let top = FFISafe::new(&top);
let bottom = FFISafe::new(&bottom);
let sec_strength = sec_strength as c_int;
let damping = damping as c_int;
Expand Down Expand Up @@ -172,16 +168,14 @@ fn padding<BD: BitDepth>(
tmp: &mut [i16; TMP_STRIDE * TMP_STRIDE],
src: Rav1dPictureDataComponentOffset,
left: &[LeftPixelRow2px<BD::Pixel>; 8],
top: &DisjointMut<AlignedVec64<u8>>,
top_off: usize,
top: CdefTop<'_>,
bottom: CdefBottom<'_>,
bottom_off: usize,
w: usize,
h: usize,
edges: CdefEdgeFlags,
) {
let top_off = top_off - 2;
let bottom_off = bottom_off - 2;
let top = top - 2_usize;
let bottom = bottom - 2_usize;
let stride = src.pixel_stride::<BD>();

// Fill extended input buffer.
Expand All @@ -207,8 +201,8 @@ fn padding<BD: BitDepth>(
}

for (i, y) in (y_start..2).enumerate() {
let offset = top_off.wrapping_add_signed(i as isize * stride);
let top = top.slice_as::<_, BD::Pixel>((offset.., ..x_end));
let top = top + i as isize * stride;
let top = top.data.slice_as::<_, BD::Pixel>((top.offset.., ..x_end));
for x in x_start..x_end {
tmp[x + y * TMP_STRIDE] = top[x].as_::<i16>();
}
Expand All @@ -228,12 +222,12 @@ fn padding<BD: BitDepth>(
}
for (i, y) in (h + 2..y_end).enumerate() {
let tmp = &mut tmp[y * TMP_STRIDE..];
let bottom_off = bottom_off.wrapping_add_signed(i as isize * stride);
let bottom = bottom + i as isize * stride;
// This is a fallback `fn`, so perf is not as important here, so an extra branch
// here should be okay.
let bottom = match bottom {
PicOrBuf::Pic(pic) => &*pic.slice::<BD, _>((bottom_off.., ..x_end)),
PicOrBuf::Buf(buf) => &*buf.slice_as((bottom_off.., ..x_end)),
let bottom = match bottom.data {
PicOrBuf::Pic(pic) => &*pic.slice::<BD, _>((bottom.offset.., ..x_end)),
PicOrBuf::Buf(buf) => &*buf.slice_as((bottom.offset.., ..x_end)),
};
for x in x_start..x_end {
tmp[x] = bottom[x].as_::<i16>();
Expand All @@ -245,10 +239,8 @@ fn padding<BD: BitDepth>(
fn cdef_filter_block_rust<BD: BitDepth>(
dst: Rav1dPictureDataComponentOffset,
left: &[LeftPixelRow2px<BD::Pixel>; 8],
top: &DisjointMut<AlignedVec64<u8>>,
top_off: usize,
top: CdefTop<'_>,
bottom: CdefBottom<'_>,
bottom_off: usize,
pri_strength: c_int,
sec_strength: c_int,
dir: c_int,
Expand All @@ -263,9 +255,7 @@ fn cdef_filter_block_rust<BD: BitDepth>(
assert!((w == 4 || w == 8) && (h == 4 || h == 8));
let mut tmp = [0; TMP_STRIDE * TMP_STRIDE]; // `12 * 12` is the maximum value of `TMP_STRIDE * (h + 4)`.

padding::<BD>(
&mut tmp, dst, left, top, top_off, bottom, bottom_off, w, h, edges,
);
padding::<BD>(&mut tmp, dst, left, top, bottom, w, h, edges);

let tmp = tmp;
let tmp_offset = 2 * TMP_STRIDE + 2;
Expand Down Expand Up @@ -385,42 +375,32 @@ unsafe extern "C" fn cdef_filter_block_c_erased<BD: BitDepth, const W: usize, co
_dst_ptr: *mut DynPixel,
_stride: ptrdiff_t,
left: *const [LeftPixelRow2px<DynPixel>; 8],
top_ptr: *const DynPixel,
bottom_ptr: *const DynPixel,
_top_ptr: *const DynPixel,
_bottom_ptr: *const DynPixel,
pri_strength: c_int,
sec_strength: c_int,
dir: c_int,
damping: c_int,
edges: CdefEdgeFlags,
bitdepth_max: c_int,
dst: *const FFISafe<Rav1dPictureDataComponentOffset>,
top: *const FFISafe<DisjointMut<AlignedVec64<u8>>>,
top: *const FFISafe<CdefTop<'_>>,
bottom: *const FFISafe<CdefBottom<'_>>,
) {
// SAFETY: Was passed as `FFISafe::new(_)` in `cdef_dir::Fn::call`.
let dst = *unsafe { FFISafe::get(dst) };
// SAFETY: Reverse of cast in `cdef::Fn::call`.
let left = unsafe { &*left.cast() };
// SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`.
let top = unsafe { FFISafe::get(top) };
let top_base = top.as_mut_ptr().cast::<BD::Pixel>().cast_const();
// SAFETY: Reverse of what was done in `cdef::Fn::call`. `top_ptr` is
// derived from `top` and so is safe to calculate the offset from.
let top_off = unsafe { top_ptr.cast::<BD::Pixel>().offset_from(top_base) } as usize;
let top = *unsafe { FFISafe::get(top) };
// SAFETY: Was passed as `FFISafe::new(_)` in `cdef::Fn::call`.
let bottom = *unsafe { FFISafe::get(bottom) };
let bottom_base = bottom.as_ptr::<BD>();
// SAFETY: Reverse of what was done in `cdef::Fn::call`. `bottom_ptr` is
// derived from `bottom` and so is safe to calculate the offset from.
let bottom_off = unsafe { bottom_ptr.cast::<BD::Pixel>().offset_from(bottom_base) } as usize;
let bd = BD::from_c(bitdepth_max);
cdef_filter_block_rust(
dst,
left,
top,
top_off,
bottom,
bottom_off,
pri_strength,
sec_strength,
dir,
Expand Down Expand Up @@ -571,7 +551,7 @@ unsafe extern "C" fn cdef_filter_neon_erased<
edges: CdefEdgeFlags,
bitdepth_max: c_int,
_dst: *const FFISafe<Rav1dPictureDataComponentOffset>,
_top: *const FFISafe<DisjointMut<AlignedVec64<u8>>>,
_top: *const FFISafe<CdefTop<'_>>,
_bottom: *const FFISafe<CdefBottom<'_>>,
) {
use crate::src::align::Align16;
Expand Down
121 changes: 61 additions & 60 deletions src/cdef_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::src::internal::Rav1dTaskContext;
use crate::src::pic_or_buf::PicOrBuf;
use crate::src::strided::Strided as _;
use crate::src::strided::WithStride;
use crate::src::with_offset::WithOffset;
use bitflags::bitflags;
use libc::ptrdiff_t;
use std::cmp;
Expand Down Expand Up @@ -269,30 +270,30 @@ pub(crate) fn rav1d_cdef_brow<BD: BitDepth>(
None
} else if sbrow_start && by == by_start {
let top = if resize {
(
&f.lf.cdef_line_buf,
f.lf.cdef_lpf_line[0].wrapping_add_signed(
WithOffset {
data: &f.lf.cdef_line_buf,
offset: f.lf.cdef_lpf_line[0].wrapping_add_signed(
((sby - 1) * 4) as isize * y_stride + (bx * 4) as isize,
),
)
}
} else {
(
&f.lf.lr_line_buf,
f.lf.lr_lpf_line[0].wrapping_add_signed(
WithOffset {
data: &f.lf.lr_line_buf,
offset: f.lf.lr_lpf_line[0].wrapping_add_signed(
(sby * (4 << sb128) - 4) as isize * y_stride
+ (bx * 4) as isize,
),
)
}
};
let bottom = bptrs[0] + (8 * y_stride);
Some((top, PicOrBuf::Pic(bottom.data), bottom.offset))
Some((top, WithOffset::pic(bottom)))
} else if !sbrow_start && by + 2 >= by_end {
let top = (
&f.lf.cdef_line_buf,
f.lf.cdef_line[tf as usize][0].wrapping_add_signed(
let top = WithOffset {
data: &f.lf.cdef_line_buf,
offset: f.lf.cdef_line[tf as usize][0].wrapping_add_signed(
(sby * 4) as isize * y_stride + (bx * 4) as isize,
),
);
};
let (buf, offset) = if resize {
(
&f.lf.cdef_line_buf,
Expand All @@ -311,26 +312,28 @@ pub(crate) fn rav1d_cdef_brow<BD: BitDepth>(
};
Some((
top,
PicOrBuf::Buf(WithStride {
buf,
stride: y_stride,
}),
offset,
WithOffset {
data: PicOrBuf::Buf(WithStride {
buf,
stride: y_stride,
}),
offset,
},
))
} else {
None
};

let ((top, top_off), bot, bot_off) = top_bot.unwrap_or_else(|| {
let top = (
&f.lf.cdef_line_buf,
f.lf.cdef_line[tf as usize][0].wrapping_add_signed(
let (top, bot) = top_bot.unwrap_or_else(|| {
let top = WithOffset {
data: &f.lf.cdef_line_buf,
offset: f.lf.cdef_line[tf as usize][0].wrapping_add_signed(
have_tt as isize * (sby * 4) as isize * y_stride
+ (bx * 4) as isize,
),
);
};
let bottom = bptrs[0] + (8 * y_stride);
(top, PicOrBuf::Pic(bottom.data), bottom.offset)
(top, WithOffset::pic(bottom))
});

if y_pri_lvl != 0 {
Expand All @@ -340,9 +343,7 @@ pub(crate) fn rav1d_cdef_brow<BD: BitDepth>(
bptrs[0],
&lr_bak[bit as usize][0],
top,
top_off,
bot,
bot_off,
adj_y_pri_lvl,
y_sec_lvl,
dir,
Expand All @@ -356,9 +357,7 @@ pub(crate) fn rav1d_cdef_brow<BD: BitDepth>(
bptrs[0],
&lr_bak[bit as usize][0],
top,
top_off,
bot,
bot_off,
0,
y_sec_lvl,
0,
Expand All @@ -381,33 +380,34 @@ pub(crate) fn rav1d_cdef_brow<BD: BitDepth>(
None
} else if sbrow_start && by == by_start {
let top = if resize {
(
&f.lf.cdef_line_buf,
f.lf.cdef_lpf_line[pl].wrapping_add_signed(
WithOffset {
data: &f.lf.cdef_line_buf,
offset: f.lf.cdef_lpf_line[pl].wrapping_add_signed(
((sby - 1) * 4) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize,
),
)
}
} else {
let line = sby * (4 << sb128) - 4;
(
&f.lf.lr_line_buf,
f.lf.lr_lpf_line[pl].wrapping_add_signed(
WithOffset {
data: &f.lf.lr_line_buf,
offset: f.lf.lr_lpf_line[pl].wrapping_add_signed(
line as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize,
),
)
}
};
let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride);
Some((top, PicOrBuf::Pic(bottom.data), bottom.offset))
Some((top, WithOffset::pic(bottom)))
} else if !sbrow_start && by + 2 >= by_end {
let top = (
&f.lf.cdef_line_buf,
f.lf.cdef_line[tf as usize][pl].wrapping_add_signed(
(sby * 8) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize,
),
);
let top = WithOffset {
data: &f.lf.cdef_line_buf,
offset: f.lf.cdef_line[tf as usize][pl]
.wrapping_add_signed(
(sby * 8) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize,
),
};
let (buf, offset) = if resize {
(
&f.lf.cdef_line_buf,
Expand All @@ -428,35 +428,36 @@ pub(crate) fn rav1d_cdef_brow<BD: BitDepth>(
};
Some((
top,
PicOrBuf::Buf(WithStride {
buf,
stride: uv_stride,
}),
offset,
WithOffset {
data: PicOrBuf::Buf(WithStride {
buf,
stride: uv_stride,
}),
offset,
},
))
} else {
None
};

let ((top, top_off), bot, bot_off) = top_bot.unwrap_or_else(|| {
let top = (
&f.lf.cdef_line_buf,
f.lf.cdef_line[tf as usize][pl].wrapping_add_signed(
have_tt as isize * (sby * 8) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize,
),
);
let (top, bot) = top_bot.unwrap_or_else(|| {
let top = WithOffset {
data: &f.lf.cdef_line_buf,
offset: f.lf.cdef_line[tf as usize][pl]
.wrapping_add_signed(
have_tt as isize * (sby * 8) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize,
),
};
let bottom = bptrs[pl] + ((8 >> ss_ver) * uv_stride);
(top, PicOrBuf::Pic(bottom.data), bottom.offset)
(top, WithOffset::pic(bottom))
});

f.dsp.cdef.fb[uv_idx as usize].call::<BD>(
bptrs[pl],
&lr_bak[bit as usize][pl],
top,
top_off,
bot,
bot_off,
uv_pri_lvl.into(),
uv_sec_lvl,
uvdir,
Expand Down

0 comments on commit 57ef1c1

Please sign in to comment.