Skip to content

Commit

Permalink
Rav1dFrameContext_lf::level: make a Box<[_]> (#726)
Browse files Browse the repository at this point in the history
there is one tricky bit of logic here.

the `filter_plane_rows_y` and `filter_plane_rows_uv` functions are
called in a loop that increments the level_ptr by some chunk size, but
the helper functions use more than that chunk size number of elements.

The functions asserted an actual length, but the comments there indicate
(to me) that that is not really needed, and just passing in the whole
remaining slice should be fine.
  • Loading branch information
kkysen authored Feb 4, 2024
2 parents 3730ada + 70807e9 commit 223b536
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 98 deletions.
13 changes: 6 additions & 7 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,7 @@ unsafe fn decode_b(
}

let ts = &mut *t.ts;
let f = &*t.f;
let f = &mut *t.f;
let frame_hdr = &***f.frame_hdr.as_ref().unwrap();
let mut b_mem = Default::default();
let b = if t.frame_thread.pass != 0 {
Expand Down Expand Up @@ -2161,7 +2161,7 @@ unsafe fn decode_b(
if frame_hdr.loopfilter.level_y != [0, 0] {
rav1d_create_lf_mask_intra(
&mut *t.lf_mask,
f.lf.level,
&mut f.lf.level,
f.b4_stride,
&*ts.lflvl.offset(b.seg_id as isize),
t.bx,
Expand Down Expand Up @@ -3151,7 +3151,7 @@ unsafe fn decode_b(
}
rav1d_create_lf_mask_inter(
&mut *t.lf_mask,
f.lf.level,
&mut f.lf.level,
f.b4_stride,
// In C, the inner dimensions (`ref`, `is_gmv`) are offset,
// but then cast back to a pointer to the full array,
Expand Down Expand Up @@ -4515,14 +4515,13 @@ pub(crate) unsafe fn rav1d_decode_frame_init(
// update allocation for loopfilter masks
if num_sb128 != f.lf.mask_sz {
freep(&mut f.lf.mask as *mut *mut Av1Filter as *mut c_void);
freep(&mut f.lf.level as *mut *mut [u8; 4] as *mut c_void);
let _ = mem::take(&mut f.lf.level);
f.lf.mask =
malloc(::core::mem::size_of::<Av1Filter>() * num_sb128 as usize) as *mut Av1Filter;
// over-allocate by 3 bytes since some of the SIMD implementations
// index this from the level type and can thus over-read by up to 3
f.lf.level = malloc(::core::mem::size_of::<[u8; 4]>() * num_sb128 as usize * 32 * 32 + 3)
as *mut [u8; 4];
if f.lf.mask.is_null() || f.lf.level.is_null() {
f.lf.level = vec![[0u8; 4]; num_sb128 as usize * 32 * 32 + 3].into(); // TODO fallible allocation
if f.lf.mask.is_null() {
f.lf.mask_sz = 0;
return Err(ENOMEM);
}
Expand Down
4 changes: 2 additions & 2 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ pub struct Rav1dFrameContext_frame_thread {
/// loopfilter
#[repr(C)]
pub struct Rav1dFrameContext_lf {
pub level: *mut [u8; 4],
pub level: Box<[[u8; 4]]>,
pub mask: *mut Av1Filter,
pub lr_mask: *mut Av1Restoration,
pub mask_sz: c_int, /* w*h */
Expand Down Expand Up @@ -665,7 +665,7 @@ pub(crate) struct Rav1dTaskContext_task_thread {

#[repr(C)]
pub(crate) struct Rav1dTaskContext {
pub f: *const Rav1dFrameContext,
pub f: *mut Rav1dFrameContext,
pub ts: *mut Rav1dTileState,
pub bx: c_int,
pub by: c_int,
Expand Down
112 changes: 46 additions & 66 deletions src/lf_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
unsafe fn filter_plane_cols_y<BD: BitDepth>(
f: *const Rav1dFrameContext,
have_left: c_int,
lvl: *const [u8; 4],
lvl: &[[u8; 4]],
b4_stride: ptrdiff_t,
mask: *const [[u16; 2]; 3],
dst: *mut BD::Pixel,
Expand All @@ -355,8 +355,7 @@ unsafe fn filter_plane_cols_y<BD: BitDepth>(
endy4: c_int,
) {
let dsp: *const Rav1dDSPContext = (*f).dsp;
let mut x = 0;
while x < w {
for x in 0..w {
if !(have_left == 0 && x == 0) {
let mut hmask: [u32; 4] = [0; 4];
if starty4 == 0 {
Expand All @@ -378,22 +377,21 @@ unsafe fn filter_plane_cols_y<BD: BitDepth>(
dst.offset((x * 4) as isize).cast(),
ls,
hmask.as_mut_ptr(),
&*(*lvl.offset(x as isize)).as_ptr().offset(0) as *const u8 as *const [u8; 4],
lvl[x as usize][0..].as_ptr() as *const [u8; 4],
b4_stride,
&(*f).lf.lim_lut.0,
endy4 - starty4,
(*f).bitdepth_max,
);
}
x += 1;
}
}

#[inline]
unsafe fn filter_plane_rows_y<BD: BitDepth>(
f: *const Rav1dFrameContext,
have_top: c_int,
mut lvl: *const [u8; 4],
lvl: &[[u8; 4]],
b4_stride: ptrdiff_t,
mask: *const [[u16; 2]; 3],
mut dst: *mut BD::Pixel,
Expand All @@ -403,8 +401,7 @@ unsafe fn filter_plane_rows_y<BD: BitDepth>(
endy4: c_int,
) {
let dsp: *const Rav1dDSPContext = (*f).dsp;
let mut y = starty4;
while y < endy4 {
for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) {
if !(have_top == 0 && y == 0) {
let vmask: [u32; 4] = [
(*mask.offset(y as isize))[0][0] as c_uint
Expand All @@ -419,24 +416,22 @@ unsafe fn filter_plane_rows_y<BD: BitDepth>(
dst.cast(),
ls,
vmask.as_ptr(),
&*(*lvl.offset(0)).as_ptr().offset(1) as *const u8 as *const [u8; 4],
lvl[0][1..].as_ptr() as *const [u8; 4],
b4_stride,
&(*f).lf.lim_lut.0,
w,
(*f).bitdepth_max,
);
}
y += 1;
dst = dst.offset(4 * BD::pxstride(ls as usize) as isize);
lvl = lvl.offset(b4_stride as isize);
}
}

#[inline]
unsafe fn filter_plane_cols_uv<BD: BitDepth>(
f: *const Rav1dFrameContext,
have_left: c_int,
lvl: *const [u8; 4],
lvl: &[[u8; 4]],
b4_stride: ptrdiff_t,
mask: *const [[u16; 2]; 2],
u: *mut BD::Pixel,
Expand All @@ -448,8 +443,7 @@ unsafe fn filter_plane_cols_uv<BD: BitDepth>(
ss_ver: c_int,
) {
let dsp: *const Rav1dDSPContext = (*f).dsp;
let mut x = 0;
while x < w {
for x in 0..w {
if !(have_left == 0 && x == 0) {
let mut hmask: [u32; 3] = [0; 3];
if starty4 == 0 {
Expand All @@ -468,7 +462,7 @@ unsafe fn filter_plane_cols_uv<BD: BitDepth>(
u.offset((x * 4) as isize).cast(),
ls,
hmask.as_mut_ptr(),
&*(*lvl.offset(x as isize)).as_ptr().offset(2) as *const u8 as *const [u8; 4],
lvl[x as usize][2..].as_ptr() as *const [u8; 4],
b4_stride,
&(*f).lf.lim_lut.0,
endy4 - starty4,
Expand All @@ -478,22 +472,21 @@ unsafe fn filter_plane_cols_uv<BD: BitDepth>(
v.offset((x * 4) as isize).cast(),
ls,
hmask.as_mut_ptr(),
&*(*lvl.offset(x as isize)).as_ptr().offset(3) as *const u8 as *const [u8; 4],
lvl[x as usize][3..].as_ptr() as *const [u8; 4],
b4_stride,
&(*f).lf.lim_lut.0,
endy4 - starty4,
(*f).bitdepth_max,
);
}
x += 1;
}
}

#[inline]
unsafe fn filter_plane_rows_uv<BD: BitDepth>(
f: *const Rav1dFrameContext,
have_top: c_int,
mut lvl: *const [u8; 4],
lvl: &[[u8; 4]],
b4_stride: ptrdiff_t,
mask: *const [[u16; 2]; 2],
u: *mut BD::Pixel,
Expand All @@ -506,8 +499,7 @@ unsafe fn filter_plane_rows_uv<BD: BitDepth>(
) {
let dsp: *const Rav1dDSPContext = (*f).dsp;
let mut off_l: ptrdiff_t = 0 as c_int as ptrdiff_t;
let mut y = starty4;
while y < endy4 {
for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) {
if !(have_top == 0 && y == 0) {
let vmask: [u32; 3] = [
(*mask.offset(y as isize))[0][0] as c_uint
Expand All @@ -520,7 +512,7 @@ unsafe fn filter_plane_rows_uv<BD: BitDepth>(
u.offset(off_l as isize).cast(),
ls,
vmask.as_ptr(),
&*(*lvl.offset(0)).as_ptr().offset(2) as *const u8 as *const [u8; 4],
lvl[0][2..].as_ptr() as *const [u8; 4],
b4_stride,
&(*f).lf.lim_lut.0,
w,
Expand All @@ -530,22 +522,20 @@ unsafe fn filter_plane_rows_uv<BD: BitDepth>(
v.offset(off_l as isize).cast(),
ls,
vmask.as_ptr(),
&*(*lvl.offset(0)).as_ptr().offset(3) as *const u8 as *const [u8; 4],
lvl[0][3..].as_ptr() as *const [u8; 4],
b4_stride,
&(*f).lf.lim_lut.0,
w,
(*f).bitdepth_max,
);
}
y += 1;
off_l += 4 * BD::pxstride(ls as usize) as isize;
lvl = lvl.offset(b4_stride as isize);
}
}

pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols<BD: BitDepth>(
f: *const Rav1dFrameContext,
p: *const *mut BD::Pixel,
p: &[*mut BD::Pixel; 3],
lflvl: *mut Av1Filter,
sby: c_int,
start_of_tile_row: c_int,
Expand Down Expand Up @@ -697,16 +687,14 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols<BD: BitDepth>(
}
}
let mut ptr: *mut BD::Pixel;
let mut level_ptr: *mut [u8; 4] =
((*f).lf.level).offset((*f).b4_stride * sby as isize * sbsz as isize);
ptr = *p.offset(0);
let level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..];
ptr = p[0];
have_left = 0 as c_int;
x = 0 as c_int;
while x < (*f).sb128w {
for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32)) {
filter_plane_cols_y::<BD>(
f,
have_left,
level_ptr as *const [u8; 4],
level_ptr,
(*f).b4_stride,
((*lflvl.offset(x as isize)).filter_y[0]).as_mut_ptr() as *const [[u16; 2]; 3],
ptr,
Expand All @@ -715,106 +703,98 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols<BD: BitDepth>(
starty4,
endy4 as c_int,
);
x += 1;
have_left = 1 as c_int;
ptr = ptr.offset(128);
level_ptr = level_ptr.offset(32);
}
if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 {
return;
}
let mut uv_off: ptrdiff_t;
level_ptr = ((*f).lf.level).offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize);
uv_off = 0 as c_int as ptrdiff_t;
let level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..];
have_left = 0 as c_int;
x = 0 as c_int;
while x < (*f).sb128w {
uv_off = 0;
for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32 >> ss_hor)) {
filter_plane_cols_uv::<BD>(
f,
have_left,
level_ptr as *const [u8; 4],
level_ptr,
(*f).b4_stride,
((*lflvl.offset(x as isize)).filter_uv[0]).as_mut_ptr() as *const [[u16; 2]; 2],
&mut *(*p.offset(1)).offset(uv_off as isize),
&mut *(*p.offset(2)).offset(uv_off as isize),
&mut *p[1].offset(uv_off as isize),
&mut *p[2].offset(uv_off as isize),
(*f).cur.stride[1],
cmp::min(32 as c_int, (*f).w4 - x * 32) + ss_hor >> ss_hor,
starty4 >> ss_ver,
uv_endy4 as c_int,
ss_ver,
);
x += 1;
have_left = 1 as c_int;
uv_off += 128 >> ss_hor;
level_ptr = level_ptr.offset((32 >> ss_hor) as isize);
}
}

pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows<BD: BitDepth>(
f: *const Rav1dFrameContext,
p: *const *mut BD::Pixel,
p: &[*mut BD::Pixel; 3],
lflvl: *mut Av1Filter,
sby: c_int,
) {
let mut x;
// Don't filter outside the frame
let have_top = (sby > 0) as c_int;
let seq_hdr = &***(*f).seq_hdr.as_ref().unwrap();
let is_sb64 = (seq_hdr.sb128 == 0) as c_int;
let starty4 = (sby & is_sb64) << 4;
let sbsz = 32 >> is_sb64;
let ss_ver =
((*f).cur.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int;
let ss_hor =
((*f).cur.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int;
let ss_ver = ((*f).cur.p.layout == Rav1dPixelLayout::I420) as c_int;
let ss_hor = ((*f).cur.p.layout != Rav1dPixelLayout::I444) as c_int;
let endy4: c_uint = (starty4 + cmp::min((*f).h4 - sby * sbsz, sbsz)) as c_uint;
let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver;

let mut ptr: *mut BD::Pixel;
let mut level_ptr: *mut [u8; 4] =
((*f).lf.level).offset((*f).b4_stride * sby as isize * sbsz as isize);
ptr = *p.offset(0);
x = 0 as c_int;
while x < (*f).sb128w {
let mut level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..];
ptr = p[0];
for x in 0..(*f).sb128w {
filter_plane_rows_y::<BD>(
f,
have_top,
level_ptr as *const [u8; 4],
level_ptr,
(*f).b4_stride,
((*lflvl.offset(x as isize)).filter_y[1]).as_mut_ptr() as *const [[u16; 2]; 3],
ptr,
(*f).cur.stride[0],
cmp::min(32 as c_int, (*f).w4 - x * 32),
cmp::min(32, (*f).w4 - x * 32),
starty4,
endy4 as c_int,
);
x += 1;
ptr = ptr.offset(128);
level_ptr = level_ptr.offset(32);
level_ptr = &level_ptr[32..];
}

let frame_hdr = &***(*f).frame_hdr.as_ref().unwrap();
if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 {
return;
}

let mut uv_off: ptrdiff_t;
level_ptr = ((*f).lf.level).offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize);
uv_off = 0 as c_int as ptrdiff_t;
x = 0 as c_int;
while x < (*f).sb128w {
let mut level_ptr =
&(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..];
uv_off = 0;
for x in 0..(*f).sb128w {
filter_plane_rows_uv::<BD>(
f,
have_top,
level_ptr as *const [u8; 4],
level_ptr,
(*f).b4_stride,
((*lflvl.offset(x as isize)).filter_uv[1]).as_mut_ptr() as *const [[u16; 2]; 2],
&mut *(*p.offset(1)).offset(uv_off as isize),
&mut *(*p.offset(2)).offset(uv_off as isize),
&mut *p[1].offset(uv_off as isize),
&mut *p[2].offset(uv_off as isize),
(*f).cur.stride[1],
cmp::min(32 as c_int, (*f).w4 - x * 32) + ss_hor >> ss_hor,
starty4 >> ss_ver,
uv_endy4 as c_int,
ss_hor,
);
x += 1;
uv_off += 128 >> ss_hor;
level_ptr = level_ptr.offset((32 >> ss_hor) as isize);
level_ptr = &level_ptr[32 >> ss_hor..];
}
}
Loading

0 comments on commit 223b536

Please sign in to comment.