diff --git a/src/decode.rs b/src/decode.rs index eec05ea41..57c4bd0e1 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1452,7 +1452,7 @@ unsafe fn decode_b( } let ts = &mut *t.ts; - let f = &*t.f; + let f = &mut *t.f; let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); let mut b_mem = Default::default(); let b = if t.frame_thread.pass != 0 { @@ -2161,7 +2161,7 @@ unsafe fn decode_b( if frame_hdr.loopfilter.level_y != [0, 0] { rav1d_create_lf_mask_intra( &mut *t.lf_mask, - f.lf.level, + &mut f.lf.level, f.b4_stride, &*ts.lflvl.offset(b.seg_id as isize), t.bx, @@ -3151,7 +3151,7 @@ unsafe fn decode_b( } rav1d_create_lf_mask_inter( &mut *t.lf_mask, - f.lf.level, + &mut f.lf.level, f.b4_stride, // In C, the inner dimensions (`ref`, `is_gmv`) are offset, // but then cast back to a pointer to the full array, @@ -4515,14 +4515,13 @@ pub(crate) unsafe fn rav1d_decode_frame_init( // update allocation for loopfilter masks if num_sb128 != f.lf.mask_sz { freep(&mut f.lf.mask as *mut *mut Av1Filter as *mut c_void); - freep(&mut f.lf.level as *mut *mut [u8; 4] as *mut c_void); + let _ = mem::take(&mut f.lf.level); f.lf.mask = malloc(::core::mem::size_of::() * num_sb128 as usize) as *mut Av1Filter; // over-allocate by 3 bytes since some of the SIMD implementations // index this from the level type and can thus over-read by up to 3 - f.lf.level = malloc(::core::mem::size_of::<[u8; 4]>() * num_sb128 as usize * 32 * 32 + 3) - as *mut [u8; 4]; - if f.lf.mask.is_null() || f.lf.level.is_null() { + f.lf.level = vec![[0u8; 4]; num_sb128 as usize * 32 * 32 + 3].into(); // TODO fallible allocation + if f.lf.mask.is_null() { f.lf.mask_sz = 0; return Err(ENOMEM); } diff --git a/src/internal.rs b/src/internal.rs index 8b739441d..99924a741 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -382,7 +382,7 @@ pub struct Rav1dFrameContext_frame_thread { /// loopfilter #[repr(C)] pub struct Rav1dFrameContext_lf { - pub level: *mut [u8; 4], + pub level: Box<[[u8; 4]]>, pub mask: *mut Av1Filter, pub lr_mask: *mut Av1Restoration, pub mask_sz: c_int, /* w*h */ @@ -665,7 +665,7 @@ pub(crate) struct Rav1dTaskContext_task_thread { #[repr(C)] pub(crate) struct Rav1dTaskContext { - pub f: *const Rav1dFrameContext, + pub f: *mut Rav1dFrameContext, pub ts: *mut Rav1dTileState, pub bx: c_int, pub by: c_int, diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 43ab33d83..a5624bea8 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -345,7 +345,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( unsafe fn filter_plane_cols_y( f: *const Rav1dFrameContext, have_left: c_int, - lvl: *const [u8; 4], + lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: *const [[u16; 2]; 3], dst: *mut BD::Pixel, @@ -355,8 +355,7 @@ unsafe fn filter_plane_cols_y( endy4: c_int, ) { let dsp: *const Rav1dDSPContext = (*f).dsp; - let mut x = 0; - while x < w { + for x in 0..w { if !(have_left == 0 && x == 0) { let mut hmask: [u32; 4] = [0; 4]; if starty4 == 0 { @@ -378,14 +377,13 @@ unsafe fn filter_plane_cols_y( dst.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*(*lvl.offset(x as isize)).as_ptr().offset(0) as *const u8 as *const [u8; 4], + lvl[x as usize][0..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, (*f).bitdepth_max, ); } - x += 1; } } @@ -393,7 +391,7 @@ unsafe fn filter_plane_cols_y( unsafe fn filter_plane_rows_y( f: *const Rav1dFrameContext, have_top: c_int, - mut lvl: *const [u8; 4], + lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: *const [[u16; 2]; 3], mut dst: *mut BD::Pixel, @@ -403,8 +401,7 @@ unsafe fn filter_plane_rows_y( endy4: c_int, ) { let dsp: *const Rav1dDSPContext = (*f).dsp; - let mut y = starty4; - while y < endy4 { + for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(have_top == 0 && y == 0) { let vmask: [u32; 4] = [ (*mask.offset(y as isize))[0][0] as c_uint @@ -419,16 +416,14 @@ unsafe fn filter_plane_rows_y( dst.cast(), ls, vmask.as_ptr(), - &*(*lvl.offset(0)).as_ptr().offset(1) as *const u8 as *const [u8; 4], + lvl[0][1..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, (*f).bitdepth_max, ); } - y += 1; dst = dst.offset(4 * BD::pxstride(ls as usize) as isize); - lvl = lvl.offset(b4_stride as isize); } } @@ -436,7 +431,7 @@ unsafe fn filter_plane_rows_y( unsafe fn filter_plane_cols_uv( f: *const Rav1dFrameContext, have_left: c_int, - lvl: *const [u8; 4], + lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: *const [[u16; 2]; 2], u: *mut BD::Pixel, @@ -448,8 +443,7 @@ unsafe fn filter_plane_cols_uv( ss_ver: c_int, ) { let dsp: *const Rav1dDSPContext = (*f).dsp; - let mut x = 0; - while x < w { + for x in 0..w { if !(have_left == 0 && x == 0) { let mut hmask: [u32; 3] = [0; 3]; if starty4 == 0 { @@ -468,7 +462,7 @@ unsafe fn filter_plane_cols_uv( u.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*(*lvl.offset(x as isize)).as_ptr().offset(2) as *const u8 as *const [u8; 4], + lvl[x as usize][2..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -478,14 +472,13 @@ unsafe fn filter_plane_cols_uv( v.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*(*lvl.offset(x as isize)).as_ptr().offset(3) as *const u8 as *const [u8; 4], + lvl[x as usize][3..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, (*f).bitdepth_max, ); } - x += 1; } } @@ -493,7 +486,7 @@ unsafe fn filter_plane_cols_uv( unsafe fn filter_plane_rows_uv( f: *const Rav1dFrameContext, have_top: c_int, - mut lvl: *const [u8; 4], + lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: *const [[u16; 2]; 2], u: *mut BD::Pixel, @@ -506,8 +499,7 @@ unsafe fn filter_plane_rows_uv( ) { let dsp: *const Rav1dDSPContext = (*f).dsp; let mut off_l: ptrdiff_t = 0 as c_int as ptrdiff_t; - let mut y = starty4; - while y < endy4 { + for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(have_top == 0 && y == 0) { let vmask: [u32; 3] = [ (*mask.offset(y as isize))[0][0] as c_uint @@ -520,7 +512,7 @@ unsafe fn filter_plane_rows_uv( u.offset(off_l as isize).cast(), ls, vmask.as_ptr(), - &*(*lvl.offset(0)).as_ptr().offset(2) as *const u8 as *const [u8; 4], + lvl[0][2..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, @@ -530,22 +522,20 @@ unsafe fn filter_plane_rows_uv( v.offset(off_l as isize).cast(), ls, vmask.as_ptr(), - &*(*lvl.offset(0)).as_ptr().offset(3) as *const u8 as *const [u8; 4], + lvl[0][3..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, (*f).bitdepth_max, ); } - y += 1; off_l += 4 * BD::pxstride(ls as usize) as isize; - lvl = lvl.offset(b4_stride as isize); } } pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( f: *const Rav1dFrameContext, - p: *const *mut BD::Pixel, + p: &[*mut BD::Pixel; 3], lflvl: *mut Av1Filter, sby: c_int, start_of_tile_row: c_int, @@ -697,16 +687,14 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } } let mut ptr: *mut BD::Pixel; - let mut level_ptr: *mut [u8; 4] = - ((*f).lf.level).offset((*f).b4_stride * sby as isize * sbsz as isize); - ptr = *p.offset(0); + let level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; + ptr = p[0]; have_left = 0 as c_int; - x = 0 as c_int; - while x < (*f).sb128w { + for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32)) { filter_plane_cols_y::( f, have_left, - level_ptr as *const [u8; 4], + level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_y[0]).as_mut_ptr() as *const [[u16; 2]; 3], ptr, @@ -715,106 +703,98 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( starty4, endy4 as c_int, ); - x += 1; have_left = 1 as c_int; ptr = ptr.offset(128); - level_ptr = level_ptr.offset(32); } if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 { return; } let mut uv_off: ptrdiff_t; - level_ptr = ((*f).lf.level).offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize); - uv_off = 0 as c_int as ptrdiff_t; + let level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; have_left = 0 as c_int; - x = 0 as c_int; - while x < (*f).sb128w { + uv_off = 0; + for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32 >> ss_hor)) { filter_plane_cols_uv::( f, have_left, - level_ptr as *const [u8; 4], + level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_uv[0]).as_mut_ptr() as *const [[u16; 2]; 2], - &mut *(*p.offset(1)).offset(uv_off as isize), - &mut *(*p.offset(2)).offset(uv_off as isize), + &mut *p[1].offset(uv_off as isize), + &mut *p[2].offset(uv_off as isize), (*f).cur.stride[1], cmp::min(32 as c_int, (*f).w4 - x * 32) + ss_hor >> ss_hor, starty4 >> ss_ver, uv_endy4 as c_int, ss_ver, ); - x += 1; have_left = 1 as c_int; uv_off += 128 >> ss_hor; - level_ptr = level_ptr.offset((32 >> ss_hor) as isize); } } pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( f: *const Rav1dFrameContext, - p: *const *mut BD::Pixel, + p: &[*mut BD::Pixel; 3], lflvl: *mut Av1Filter, sby: c_int, ) { - let mut x; + // Don't filter outside the frame let have_top = (sby > 0) as c_int; let seq_hdr = &***(*f).seq_hdr.as_ref().unwrap(); let is_sb64 = (seq_hdr.sb128 == 0) as c_int; let starty4 = (sby & is_sb64) << 4; let sbsz = 32 >> is_sb64; - let ss_ver = - ((*f).cur.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; - let ss_hor = - ((*f).cur.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; + let ss_ver = ((*f).cur.p.layout == Rav1dPixelLayout::I420) as c_int; + let ss_hor = ((*f).cur.p.layout != Rav1dPixelLayout::I444) as c_int; let endy4: c_uint = (starty4 + cmp::min((*f).h4 - sby * sbsz, sbsz)) as c_uint; let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; + let mut ptr: *mut BD::Pixel; - let mut level_ptr: *mut [u8; 4] = - ((*f).lf.level).offset((*f).b4_stride * sby as isize * sbsz as isize); - ptr = *p.offset(0); - x = 0 as c_int; - while x < (*f).sb128w { + let mut level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; + ptr = p[0]; + for x in 0..(*f).sb128w { filter_plane_rows_y::( f, have_top, - level_ptr as *const [u8; 4], + level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_y[1]).as_mut_ptr() as *const [[u16; 2]; 3], ptr, (*f).cur.stride[0], - cmp::min(32 as c_int, (*f).w4 - x * 32), + cmp::min(32, (*f).w4 - x * 32), starty4, endy4 as c_int, ); - x += 1; ptr = ptr.offset(128); - level_ptr = level_ptr.offset(32); + level_ptr = &level_ptr[32..]; } + let frame_hdr = &***(*f).frame_hdr.as_ref().unwrap(); if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 { return; } + let mut uv_off: ptrdiff_t; - level_ptr = ((*f).lf.level).offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize); - uv_off = 0 as c_int as ptrdiff_t; - x = 0 as c_int; - while x < (*f).sb128w { + let mut level_ptr = + &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + uv_off = 0; + for x in 0..(*f).sb128w { filter_plane_rows_uv::( f, have_top, - level_ptr as *const [u8; 4], + level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_uv[1]).as_mut_ptr() as *const [[u16; 2]; 2], - &mut *(*p.offset(1)).offset(uv_off as isize), - &mut *(*p.offset(2)).offset(uv_off as isize), + &mut *p[1].offset(uv_off as isize), + &mut *p[2].offset(uv_off as isize), (*f).cur.stride[1], cmp::min(32 as c_int, (*f).w4 - x * 32) + ss_hor >> ss_hor, starty4 >> ss_ver, uv_endy4 as c_int, ss_hor, ); - x += 1; uv_off += 128 >> ss_hor; - level_ptr = level_ptr.offset((32 >> ss_hor) as isize); + level_ptr = &level_ptr[32 >> ss_hor..]; } } diff --git a/src/lf_mask.rs b/src/lf_mask.rs index 99c099dd6..77d9af515 100644 --- a/src/lf_mask.rs +++ b/src/lf_mask.rs @@ -355,7 +355,7 @@ fn mask_edges_chroma( pub(crate) unsafe fn rav1d_create_lf_mask_intra( lflvl: &mut Av1Filter, - level_cache: *mut [u8; 4], + level_cache: &mut [[u8; 4]], b4_stride: ptrdiff_t, filter_level: &[[[u8; 2]; 8]; 4], bx: c_int, @@ -381,10 +381,6 @@ pub(crate) unsafe fn rav1d_create_lf_mask_intra( let by4 = by & 31; if bw4 != 0 && bh4 != 0 { - // TODO: Remove when `level_cache` is already a slice coming from a `Vec`. - // That refactor is complex, though, so for now we make it a slice by how elements are accessed. - let level_cache_len = (by * b4_stride + bx) + ((bh4 - 1) * b4_stride + bw4); - let level_cache = std::slice::from_raw_parts_mut(level_cache, level_cache_len); let offset = by * b4_stride + bx; for y in 0..bh4 { let offset = offset + y * b4_stride; @@ -420,11 +416,6 @@ pub(crate) unsafe fn rav1d_create_lf_mask_intra( let cbx4 = bx4 >> ss_hor; let cby4 = by4 >> ss_ver; - // TODO: Remove when `level_cache` is already a slice coming from a `Vec`. - // That refactor is complex, though, so for now we make it a slice by how elements are accessed. - let level_cache_len = - ((by >> ss_ver) * b4_stride + (bx >> ss_hor)) + ((cbh4 - 1) * b4_stride + cbw4); - let level_cache = std::slice::from_raw_parts_mut(level_cache, level_cache_len); let offset = (by >> ss_ver) * b4_stride + (bx >> ss_hor); for y in 0..cbh4 { let offset = offset + y * b4_stride; @@ -451,7 +442,7 @@ pub(crate) unsafe fn rav1d_create_lf_mask_intra( pub(crate) unsafe fn rav1d_create_lf_mask_inter( lflvl: &mut Av1Filter, - level_cache: *mut [u8; 4], + level_cache: &mut [[u8; 4]], b4_stride: ptrdiff_t, filter_level: &[[[u8; 2]; 8]; 4], r#ref: usize, @@ -482,10 +473,6 @@ pub(crate) unsafe fn rav1d_create_lf_mask_inter( let by4 = by & 31; if bw4 != 0 && bh4 != 0 { - // TODO: Remove when `level_cache` is already a slice coming from a `Vec`. - // That refactor is complex, though, so for now we make it a slice by how elements are accessed. - let level_cache_len = (by * b4_stride + bx) + ((bh4 - 1) * b4_stride + bw4); - let level_cache = std::slice::from_raw_parts_mut(level_cache, level_cache_len); let offset = by * b4_stride + bx; for y in 0..bh4 { let offset = offset + y * b4_stride; @@ -532,11 +519,6 @@ pub(crate) unsafe fn rav1d_create_lf_mask_inter( let cbx4 = bx4 >> ss_hor; let cby4 = by4 >> ss_ver; - // TODO: Remove when `level_cache` is already a slice coming from a `Vec`. - // That refactor is complex, though, so for now we make it a slice by how elements are accessed. - let level_cache_len = - ((by >> ss_ver) * b4_stride + (bx >> ss_hor)) + ((cbh4 - 1) * b4_stride + cbw4); - let level_cache = std::slice::from_raw_parts_mut(level_cache, level_cache_len); let offset = (by >> ss_ver) * b4_stride + (bx >> ss_hor); for y in 0..cbh4 { let offset = offset + y * b4_stride; diff --git a/src/lib.rs b/src/lib.rs index e89c2327c..750c38da2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -979,7 +979,7 @@ impl Drop for Rav1dContext { let _ = mem::take(&mut (*f).tiles); free((*f).lf.mask as *mut c_void); free((*f).lf.lr_mask as *mut c_void); - free((*f).lf.level as *mut c_void); + let _ = mem::take(&mut (*f).lf.level); free((*f).lf.tx_lpf_right_edge[0] as *mut c_void); free((*f).lf.start_of_tile_row as *mut c_void); rav1d_refmvs_clear(&mut (*f).rf); diff --git a/src/recon.rs b/src/recon.rs index b2cac4fc3..f45166102 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -4484,7 +4484,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_cols( (f.lf.mask).offset(((sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w) as isize); rav1d_loopfilter_sbrow_cols::( f, - p.as_ptr(), + &p, mask, sby, *(f.lf.start_of_tile_row).offset(sby as isize) as c_int, @@ -4516,7 +4516,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( if c.inloop_filters as c_uint & RAV1D_INLOOPFILTER_DEBLOCK as c_int as c_uint != 0 && (frame_hdr.loopfilter.level_y[0] != 0 || frame_hdr.loopfilter.level_y[1] != 0) { - rav1d_loopfilter_sbrow_rows::(f, p.as_ptr(), mask, sby); + rav1d_loopfilter_sbrow_rows::(f, &p, mask, sby); } if seq_hdr.cdef != 0 || f.lf.restore_planes != 0 { rav1d_copy_lpf::(c, f, p.as_ptr(), sby);