Skip to content

Commit

Permalink
Rav1dFrameContext_lf::cdef_line: Convert pointers to offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
randomPoison committed Mar 1, 2024
1 parent 3fba038 commit 8a433b4
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 52 deletions.
35 changes: 35 additions & 0 deletions include/common/bitdepth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::ops::Div;
use std::ops::Mul;
use std::ops::Rem;
use std::ops::Shr;
use std::slice;
use to_method::To as _;

pub trait FromPrimitive<T> {
Expand Down Expand Up @@ -96,6 +97,16 @@ impl BPC {
Self::BPC16
}
}

pub fn pxstride<T>(self, n: T) -> T
where
T: Copy + From<u8> + Div<Output = T>,
{
match self {
BPC::BPC8 => n,
BPC::BPC16 => n / 2.into(),
}
}
}

pub trait BitDepth: Clone + Copy {
Expand Down Expand Up @@ -193,6 +204,30 @@ pub trait BitDepth: Clone + Copy {

fn get_intermediate_bits(&self) -> u8;

fn cast_pixel_slice(bytes: &[u8]) -> &[Self::Pixel] {
let len = Self::pxstride(bytes.len());

assert!(bytes.len() % len == 0);
assert!(bytes.as_ptr() as usize % mem::align_of::<Self::Pixel>() == 0);

// SAFETY: We've checked that alignment and the number of elements is
// correct, and the new length returned by `pxstride` will either be
// `len` or `len / 2`, which is guaranteed to be in bounds.
unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), len) }
}

fn cast_pixel_slice_mut(bytes: &mut [u8]) -> &mut [Self::Pixel] {
let len = Self::pxstride(bytes.len());

assert!(bytes.len() % len == 0);
assert!(bytes.as_ptr() as usize % mem::align_of::<Self::Pixel>() == 0);

// SAFETY: We've checked that alignment and the number of elements is
// correct, and the new length returned by `pxstride` will either be
// `len` or `len / 2`, which is guaranteed to be in bounds.
unsafe { slice::from_raw_parts_mut(bytes.as_mut_ptr().cast(), len) }
}

const PREP_BIAS: i16;

unsafe fn select<T>(bd: &BitDepthUnion<T>) -> &T::T<Self>
Expand Down
70 changes: 39 additions & 31 deletions src/cdef_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ impl Backup2x8Flags {
}

unsafe fn backup2lines<BD: BitDepth>(
dst: &[*mut BD::Pixel; 3],
dst_buf: &mut [BD::Pixel],
dst_off: [usize; 3],
src: &[*mut BD::Pixel; 3],
stride: &[ptrdiff_t; 2],
layout: Rav1dPixelLayout,
Expand All @@ -42,13 +43,13 @@ unsafe fn backup2lines<BD: BitDepth>(
let len = 2 * y_stride.unsigned_abs();
if y_stride < 0 {
BD::pixel_copy(
slice::from_raw_parts_mut(dst[0].offset(y_stride), len),
&mut dst_buf[dst_off[0].wrapping_add_signed(y_stride)..][..len],
slice::from_raw_parts(src[0].offset(7 * y_stride), len),
len,
);
} else {
BD::pixel_copy(
slice::from_raw_parts_mut(dst[0], len),
&mut dst_buf[dst_off[0]..][..len],
slice::from_raw_parts(src[0].offset(6 * y_stride), len),
len,
);
Expand All @@ -65,12 +66,12 @@ unsafe fn backup2lines<BD: BitDepth>(
};

BD::pixel_copy(
slice::from_raw_parts_mut(dst[1].offset(uv_stride), len),
&mut dst_buf[dst_off[1].wrapping_add_signed(uv_stride)..][..len],
slice::from_raw_parts(src[1].offset(uv_off * uv_stride), len),
len,
);
BD::pixel_copy(
slice::from_raw_parts_mut(dst[2].offset(uv_stride), len),
&mut dst_buf[dst_off[2].wrapping_add_signed(uv_stride)..][..len],
slice::from_raw_parts(src[2].offset(uv_off * uv_stride), len),
len,
);
Expand All @@ -82,12 +83,12 @@ unsafe fn backup2lines<BD: BitDepth>(
};

BD::pixel_copy(
slice::from_raw_parts_mut(dst[1], len),
&mut dst_buf[dst_off[1]..][..len],
slice::from_raw_parts(src[1].offset(uv_off * uv_stride), len),
len,
);
BD::pixel_copy(
slice::from_raw_parts_mut(dst[2], len),
&mut dst_buf[dst_off[2]..][..len],
slice::from_raw_parts(src[2].offset(uv_off * uv_stride), len),
len,
);
Expand Down Expand Up @@ -157,7 +158,7 @@ fn adjust_strength(strength: c_int, var: c_uint) -> c_int {
pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
c: &Rav1dContext,
tc: &mut Rav1dTaskContext,
f: &Rav1dFrameData,
f: &mut Rav1dFrameData,
p: &[*mut BD::Pixel; 3],
lflvl_offset: i32,
by_start: c_int,
Expand Down Expand Up @@ -194,6 +195,8 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let y_stride: ptrdiff_t = BD::pxstride(f.cur.stride[0]);
let uv_stride: ptrdiff_t = BD::pxstride(f.cur.stride[1]);

let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf);

let mut bit = false;
for by in (by_start..by_end).step_by(2) {
let tf = tc.top_pre_cdef_toggle != 0;
Expand All @@ -206,15 +209,15 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
&& edges.contains(CdefEdgeFlags::HAVE_BOTTOM)
{
// backup pre-filter data for next iteration
let cdef_top_bak: [*mut BD::Pixel; 3] = [
(f.lf.cdef_line[!tf as usize][0] as *mut BD::Pixel)
.offset(have_tt as isize * sby as isize * 4 * y_stride),
(f.lf.cdef_line[!tf as usize][1] as *mut BD::Pixel)
.offset(have_tt as isize * sby as isize * 8 * uv_stride),
(f.lf.cdef_line[!tf as usize][2] as *mut BD::Pixel)
.offset(have_tt as isize * sby as isize * 8 * uv_stride),
let cdef_top_bak = [
f.lf.cdef_line[!tf as usize][0]
.wrapping_add_signed(have_tt as isize * sby as isize * 4 * y_stride),
f.lf.cdef_line[!tf as usize][1]
.wrapping_add_signed(have_tt as isize * sby as isize * 8 * uv_stride),
f.lf.cdef_line[!tf as usize][2]
.wrapping_add_signed(have_tt as isize * sby as isize * 8 * uv_stride),
];
backup2lines::<BD>(&cdef_top_bak, &ptrs, &f.cur.stride, layout);
backup2lines::<BD>(cdef_line_buf, cdef_top_bak, &ptrs, &f.cur.stride, layout);
}

let mut lr_bak =
Expand Down Expand Up @@ -327,9 +330,11 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
bot = bptrs[0].offset(8 * y_stride as isize);
st_y = false;
} else if !sbrow_start && by + 2 >= by_end {
top = f.lf.cdef_line[tf as usize][0]
.cast::<BD::Pixel>()
.offset((sby * 4) as isize * y_stride + (bx * 4) as isize);
offset = (sby * 4) as isize * y_stride + (bx * 4) as isize;
top = cdef_line_buf
.as_mut_ptr()
.add(f.lf.cdef_line[tf as usize][0])
.offset(offset);
if resize {
offset = (sby * 4 + 2) as isize * y_stride + (bx * 4) as isize;
bot = f.lf.cdef_lpf_line[0].cast::<BD::Pixel>().offset(offset);
Expand All @@ -344,10 +349,12 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
}

if st_y {
offset = (sby * 4) as isize * y_stride;
top = f.lf.cdef_line[tf as usize][0]
.cast::<BD::Pixel>()
.offset(have_tt as isize * offset + (bx * 4) as isize);
offset = have_tt as isize * (sby * 4) as isize * y_stride
+ (bx * 4) as isize;
top = cdef_line_buf
.as_mut_ptr()
.add(f.lf.cdef_line[tf as usize][0])
.offset(offset);
bot = bptrs[0].offset(8 * y_stride as isize);
}

Expand Down Expand Up @@ -415,8 +422,9 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
} else if !sbrow_start && by + 2 >= by_end {
let top_offset: ptrdiff_t = (sby * 8) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
top = f.lf.cdef_line[tf as usize][pl]
.cast::<BD::Pixel>()
top = cdef_line_buf
.as_mut_ptr()
.add(f.lf.cdef_line[tf as usize][pl])
.offset(top_offset);
if resize {
offset = (sby * 4 + 2) as isize * uv_stride
Expand All @@ -437,12 +445,12 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
}

if st_uv {
let offset_0 = (sby * 8) as isize * uv_stride;
top =
f.lf.cdef_line[tf as usize][pl].cast::<BD::Pixel>().offset(
have_tt as isize * offset_0
+ (bx * 4 >> ss_hor) as isize,
);
let offset = have_tt as isize * (sby * 8) as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
top = cdef_line_buf
.as_mut_ptr()
.add(f.lf.cdef_line[tf as usize][pl])
.offset(offset);
bot = bptrs[pl].offset((8 >> ss_ver) * uv_stride);
}

Expand Down
45 changes: 30 additions & 15 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::include::common::bitdepth::BitDepth16;
use crate::include::common::bitdepth::BitDepth8;
use crate::include::common::bitdepth::DynCoef;
use crate::include::common::bitdepth::DynPixel;
use crate::include::common::bitdepth::BPC;
use crate::include::common::intops::apply_sign64;
use crate::include::common::intops::iclip;
use crate::include::common::intops::iclip_u8;
Expand Down Expand Up @@ -4394,29 +4395,43 @@ pub(crate) unsafe fn rav1d_decode_frame_init(
// failure:
// f.lf.cdef_buf_plane_sz = [0, 0];
f.lf.cdef_line_buf.resize(alloc_sz, 0);
let mut ptr = f.lf.cdef_line_buf.as_mut_ptr();

ptr = ptr.offset(32);
let bpc = BPC::from_bitdepth_max(f.bitdepth_max);
let y_stride_px = bpc.pxstride(f.cur.stride[0]);
let uv_stride_px = bpc.pxstride(f.cur.stride[1]);

let mut offset = bpc.pxstride(32usize);
if y_stride < 0 {
f.lf.cdef_line[0][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 1))) as *mut DynPixel;
f.lf.cdef_line[1][0] = ptr.offset(-(y_stride * (f.sbh as isize * 4 - 3))) as *mut DynPixel;
f.lf.cdef_line[0][0] =
offset.wrapping_add_signed(-(y_stride_px * (f.sbh as isize * 4 - 1)));
f.lf.cdef_line[1][0] =
offset.wrapping_add_signed(-(y_stride_px * (f.sbh as isize * 4 - 3)));
} else {
f.lf.cdef_line[0][0] = ptr.offset(y_stride * 0) as *mut DynPixel;
f.lf.cdef_line[1][0] = ptr.offset(y_stride * 2) as *mut DynPixel;
f.lf.cdef_line[0][0] = offset.wrapping_add_signed(y_stride_px * 0);
f.lf.cdef_line[1][0] = offset.wrapping_add_signed(y_stride_px * 2);
}
ptr = ptr.offset(y_stride.abs() * f.sbh as isize * 4);
offset = offset.wrapping_add_signed(y_stride_px.abs() * f.sbh as isize * 4);
if uv_stride < 0 {
f.lf.cdef_line[0][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 1))) as *mut DynPixel;
f.lf.cdef_line[0][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 3))) as *mut DynPixel;
f.lf.cdef_line[1][1] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 5))) as *mut DynPixel;
f.lf.cdef_line[1][2] = ptr.offset(-(uv_stride * (f.sbh as isize * 8 - 7))) as *mut DynPixel;
f.lf.cdef_line[0][1] =
offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 1)));
f.lf.cdef_line[0][2] =
offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 3)));
f.lf.cdef_line[1][1] =
offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 5)));
f.lf.cdef_line[1][2] =
offset.wrapping_add_signed(-(uv_stride_px * (f.sbh as isize * 8 - 7)));
} else {
f.lf.cdef_line[0][1] = ptr.offset(uv_stride * 0) as *mut DynPixel;
f.lf.cdef_line[0][2] = ptr.offset(uv_stride * 2) as *mut DynPixel;
f.lf.cdef_line[1][1] = ptr.offset(uv_stride * 4) as *mut DynPixel;
f.lf.cdef_line[1][2] = ptr.offset(uv_stride * 6) as *mut DynPixel;
f.lf.cdef_line[0][1] = offset.wrapping_add_signed(uv_stride_px * 0);
f.lf.cdef_line[0][2] = offset.wrapping_add_signed(uv_stride_px * 2);
f.lf.cdef_line[1][1] = offset.wrapping_add_signed(uv_stride_px * 4);
f.lf.cdef_line[1][2] = offset.wrapping_add_signed(uv_stride_px * 6);
}

let mut ptr =
f.lf.cdef_line_buf
.as_mut_ptr()
.add(32)
.offset(y_stride.abs() * f.sbh as isize * 4);
if need_cdef_lpf_copy != 0 {
ptr = ptr.offset(uv_stride.abs() * f.sbh as isize * 8);
if y_stride < 0 {
Expand Down
8 changes: 4 additions & 4 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ pub(crate) struct Rav1dFrameContext_bd_fn {
pub filter_sbrow_deblock_cols: filter_sbrow_fn,
pub filter_sbrow_deblock_rows: filter_sbrow_fn,
pub filter_sbrow_cdef:
unsafe fn(&Rav1dContext, &Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (),
unsafe fn(&Rav1dContext, &mut Rav1dFrameData, &mut Rav1dTaskContext, c_int) -> (),
pub filter_sbrow_resize: filter_sbrow_fn,
pub filter_sbrow_lr: filter_sbrow_fn,
pub backup_ipred_edge: backup_ipred_edge_fn,
Expand Down Expand Up @@ -435,9 +435,9 @@ pub struct Rav1dFrameContext_lf {
pub tx_lpf_right_edge: Vec<u8>, /* len = h*2 */
pub cdef_line_buf: AlignedVec32<u8>, /* AlignedVec32<DynPixel> */
pub lr_line_buf: *mut u8,
pub cdef_line: [[*mut DynPixel; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [*mut DynPixel; 3], /* plane */
pub lr_lpf_line: [*mut DynPixel; 3], /* plane */
pub cdef_line: [[usize; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [*mut DynPixel; 3], /* plane */
pub lr_lpf_line: [*mut DynPixel; 3], /* plane */

// in-loop filter per-frame state keeping
pub start_of_tile_row: *mut u8,
Expand Down
4 changes: 2 additions & 2 deletions src/recon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4568,7 +4568,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows<BD: BitDepth>(

pub(crate) unsafe fn rav1d_filter_sbrow_cdef<BD: BitDepth>(
c: &Rav1dContext,
f: &Rav1dFrameData,
f: &mut Rav1dFrameData,
tc: &mut Rav1dTaskContext,
sby: c_int,
) {
Expand Down Expand Up @@ -4721,10 +4721,10 @@ pub(crate) unsafe fn rav1d_filter_sbrow<BD: BitDepth>(
rav1d_filter_sbrow_deblock_cols::<BD>(c, f, t, sby);
rav1d_filter_sbrow_deblock_rows::<BD>(c, f, t, sby);
let seq_hdr = &***f.seq_hdr.as_ref().unwrap();
let frame_hdr = &***f.frame_hdr.as_ref().unwrap();
if seq_hdr.cdef != 0 {
rav1d_filter_sbrow_cdef::<BD>(c, f, t, sby);
}
let frame_hdr = &***f.frame_hdr.as_ref().unwrap();
if frame_hdr.size.width[0] != frame_hdr.size.width[1] {
rav1d_filter_sbrow_resize::<BD>(c, f, t, sby);
}
Expand Down

0 comments on commit 8a433b4

Please sign in to comment.