diff --git a/src/cdef_apply.rs b/src/cdef_apply.rs index 5421c1f2b..cf23e6759 100644 --- a/src/cdef_apply.rs +++ b/src/cdef_apply.rs @@ -7,7 +7,6 @@ use crate::src::cdef::CdefEdgeFlags; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dFrameData; use crate::src::internal::Rav1dTaskContext; -use crate::src::lf_mask::Av1Filter; use bitflags::bitflags; use libc::ptrdiff_t; use std::cmp; @@ -160,7 +159,7 @@ pub(crate) unsafe fn rav1d_cdef_brow( tc: &mut Rav1dTaskContext, f: &Rav1dFrameData, p: &[*mut BD::Pixel; 3], - lflvl: *const Av1Filter, + lflvl_offset: i32, by_start: c_int, by_end: c_int, sbrow_start: bool, @@ -228,7 +227,8 @@ pub(crate) unsafe fn rav1d_cdef_brow( for sbx in 0..sb64w { let sb128x = sbx >> 1; let sb64_idx = ((by & sbsz) >> 3) + (sbx & 1); - let cdef_idx = (*lflvl.offset(sb128x as isize)).cdef_idx[sb64_idx as usize] as c_int; + let cdef_idx = + f.lf.mask[(lflvl_offset + sb128x) as usize].cdef_idx[sb64_idx as usize] as c_int; if cdef_idx == -1 || frame_hdr.cdef.y_strength[cdef_idx as usize] == 0 && frame_hdr.cdef.uv_strength[cdef_idx as usize] == 0 @@ -236,7 +236,8 @@ pub(crate) unsafe fn rav1d_cdef_brow( last_skip = true; } else { // Create a complete 32-bit mask for the sb row ahead of time. - let noskip_row = (*lflvl.offset(sb128x as isize)).noskip_mask[by_idx as usize]; + let noskip_row = + f.lf.mask[(lflvl_offset + sb128x) as usize].noskip_mask[by_idx as usize]; let noskip_mask = (noskip_row[1] as u32) << 16 | noskip_row[0] as u32; let y_lvl = frame_hdr.cdef.y_strength[cdef_idx as usize]; diff --git a/src/decode.rs b/src/decode.rs index 2648d0fbc..1a11c8ff8 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -152,7 +152,6 @@ use crate::src::lf_mask::rav1d_calc_eih; use crate::src::lf_mask::rav1d_calc_lf_values; use crate::src::lf_mask::rav1d_create_lf_mask_inter; use crate::src::lf_mask::rav1d_create_lf_mask_intra; -use crate::src::lf_mask::Av1Filter; use crate::src::lf_mask::Av1Restoration; use crate::src::lf_mask::Av1RestorationUnit; use crate::src::log::Rav1dLog as _; @@ -4129,9 +4128,7 @@ pub(crate) unsafe fn rav1d_decode_tile_sbrow( t.pal_sz_uv[1] = Default::default(); let sb128y = t.by >> 5; t.a = f.a.offset((col_sb128_start + tile_row * f.sb128w) as isize); - t.lf_mask = - f.lf.mask - .offset((sb128y * f.sb128w + col_sb128_start) as isize); + t.lf_mask = f.lf.mask[(sb128y * f.sb128w + col_sb128_start) as usize..].as_mut_ptr(); for bx in (ts.tiling.col_start..ts.tiling.col_end).step_by(sb_step as usize) { t.bx = bx; if c.flush.load(Ordering::Acquire) != 0 { @@ -4501,30 +4498,24 @@ pub(crate) unsafe fn rav1d_decode_frame_init( } // update allocation for loopfilter masks - if num_sb128 != f.lf.mask_sz { - freep(&mut f.lf.mask as *mut *mut Av1Filter as *mut c_void); - f.lf.mask = - malloc(::core::mem::size_of::() * num_sb128 as usize) as *mut Av1Filter; - // over-allocate one element (4 bytes) since some of the SIMD implementations - // index this from the level type and can thus over-read by up to 3 bytes. - f.lf.level - .resize(num_sb128 as usize * 32 * 32 + 1, [0u8; 4]); // TODO: Fallible allocation - if f.lf.mask.is_null() { - f.lf.mask_sz = 0; - return Err(ENOMEM); - } - if c.n_fc > 1 { - // TODO: Fallible allocation - f.frame_thread - .b - .resize_with(num_sb128 as usize * 32 * 32, Default::default); - // TODO: fallible allocation - f.frame_thread - .cbi - .resize_with(num_sb128 as usize * 32 * 32, Default::default); - } - f.lf.mask_sz = num_sb128; + f.lf.mask.clear(); + // TODO: Fallible allocation. + f.lf.mask.resize_with(num_sb128 as usize, Default::default); + // over-allocate one element (4 bytes) since some of the SIMD implementations + // index this from the level type and can thus over-read by up to 3 bytes. + f.lf.level + .resize(num_sb128 as usize * 32 * 32 + 1, [0u8; 4]); // TODO: Fallible allocation + if c.n_fc > 1 { + // TODO: Fallible allocation + f.frame_thread + .b + .resize_with(num_sb128 as usize * 32 * 32, Default::default); + + // TODO: fallible allocation + f.frame_thread + .cbi + .resize_with(num_sb128 as usize * 32 * 32, Default::default); } f.sr_sb128w = f.sr_cur.p.p.w + 127 >> 7; @@ -4552,7 +4543,6 @@ pub(crate) unsafe fn rav1d_decode_frame_init( f.lf.last_sharpness = frame_hdr.loopfilter.sharpness; } rav1d_calc_lf_values(&mut f.lf.lvl, &frame_hdr, &[0, 0, 0, 0]); - slice::from_raw_parts_mut(f.lf.mask, num_sb128.try_into().unwrap()).fill_with(Default::default); let ipred_edge_sz = f.sbh * f.sb128w << hbd; if ipred_edge_sz != f.ipred_edge_sz { @@ -4658,7 +4648,6 @@ pub(crate) unsafe fn rav1d_decode_frame_init( // We never dereference those pointers, so it doesn't really matter // what they point at, as long as the pointers are valid. let has_chroma = (f.cur.p.layout != Rav1dPixelLayout::I400) as usize; - f.lf.mask_ptr = f.lf.mask; f.lf.p = array::from_fn(|i| f.cur.data.data[has_chroma * i].cast()); f.lf.sr_p = array::from_fn(|i| f.sr_cur.p.data.data[has_chroma * i].cast()); diff --git a/src/internal.rs b/src/internal.rs index 517c96b4b..2e707df32 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -447,9 +447,8 @@ pub struct Rav1dFrameContext_frame_thread { #[repr(C)] pub struct Rav1dFrameContext_lf { pub level: Vec<[u8; 4]>, - pub mask: *mut Av1Filter, + pub mask: Vec, /* len = w*h */ pub lr_mask: *mut Av1Restoration, - pub mask_sz: c_int, /* w*h */ pub lr_mask_sz: c_int, pub cdef_buf_plane_sz: [c_int; 2], /* stride*sbh*4 */ pub cdef_buf_sbh: c_int, @@ -471,8 +470,6 @@ pub struct Rav1dFrameContext_lf { pub need_cdef_lpf_copy: c_int, pub p: [*mut DynPixel; 3], pub sr_p: [*mut DynPixel; 3], - pub mask_ptr: *mut Av1Filter, - pub prev_mask_ptr: *mut Av1Filter, pub restore_planes: c_int, // enum LrRestorePlanes } diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 093e3a4f2..f976d5166 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -4,7 +4,6 @@ use crate::src::env::BlockContext; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dDSPContext; use crate::src::internal::Rav1dFrameData; -use crate::src::lf_mask::Av1Filter; use crate::src::lr_apply::LR_RESTORE_U; use crate::src::lr_apply::LR_RESTORE_V; use crate::src::lr_apply::LR_RESTORE_Y; @@ -539,12 +538,13 @@ unsafe fn filter_plane_rows_uv( } pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( - f: &Rav1dFrameData, + f: &mut Rav1dFrameData, p: &[*mut BD::Pixel; 3], - lflvl: *mut Av1Filter, + lflvl_offset: usize, sby: c_int, start_of_tile_row: c_int, ) { + let lflvl = f.lf.mask[lflvl_offset..].as_mut_ptr(); let mut have_left; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let is_sb64 = (seq_hdr.sb128 == 0) as c_int; @@ -730,11 +730,13 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( - f: &Rav1dFrameData, + f: &mut Rav1dFrameData, p: &[*mut BD::Pixel; 3], - lflvl: *mut Av1Filter, + lflvl_offset: usize, sby: c_int, ) { + let lflvl = f.lf.mask[lflvl_offset..].as_mut_ptr(); + // Don't filter outside the frame let have_top = sby > 0; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 16115d880..d8d12db54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -912,7 +912,7 @@ impl Drop for Rav1dContext { rav1d_free_aligned(f.ipred_edge[0] as *mut c_void); free(f.a as *mut c_void); let _ = mem::take(&mut f.tiles); - free(f.lf.mask as *mut c_void); + let _ = mem::take(&mut f.lf.mask); // TODO: remove when context is owned free(f.lf.lr_mask as *mut c_void); let _ = mem::take(&mut f.lf.level); free(f.lf.tx_lpf_right_edge[0] as *mut c_void); diff --git a/src/recon.rs b/src/recon.rs index 98113bcb9..4c1ee7855 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -76,7 +76,6 @@ use crate::src::levels::WHT_WHT; use crate::src::lf_apply::rav1d_copy_lpf; use crate::src::lf_apply::rav1d_loopfilter_sbrow_cols; use crate::src::lf_apply::rav1d_loopfilter_sbrow_rows; -use crate::src::lf_mask::Av1Filter; use crate::src::lr_apply::rav1d_lr_sbrow; use crate::src::msac::rav1d_msac_decode_bool_adapt; use crate::src::msac::rav1d_msac_decode_bool_equi; @@ -4539,12 +4538,11 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_cols( ), ]; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); - let mask: *mut Av1Filter = - (f.lf.mask).offset(((sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w) as isize); + let mask_offset = (sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w; rav1d_loopfilter_sbrow_cols::( f, &p, - mask, + mask_offset as usize, sby, *(f.lf.start_of_tile_row).offset(sby as isize) as c_int, ); @@ -4569,15 +4567,16 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( ), ]; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); - let mask: *mut Av1Filter = - (f.lf.mask).offset(((sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w) as isize); + let sb128 = seq_hdr.sb128; + let cdef = seq_hdr.cdef; + let mask_offset = (sby >> (sb128 == 0) as c_int) * f.sb128w; let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); if c.inloop_filters.contains(Rav1dInloopFilterType::DEBLOCK) && (frame_hdr.loopfilter.level_y[0] != 0 || frame_hdr.loopfilter.level_y[1] != 0) { - rav1d_loopfilter_sbrow_rows::(f, &p, mask, sby); + rav1d_loopfilter_sbrow_rows::(f, &p, mask_offset as usize, sby); } - if seq_hdr.cdef != 0 || f.lf.restore_planes != 0 { + if cdef != 0 || f.lf.restore_planes != 0 { rav1d_copy_lpf::(c, &mut *f, p.as_ptr(), sby); } } @@ -4605,10 +4604,8 @@ pub(crate) unsafe fn rav1d_filter_sbrow_cdef( ), ]; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); - let prev_mask: *mut Av1Filter = - (f.lf.mask).offset(((sby - 1 >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w) as isize); - let mask: *mut Av1Filter = - (f.lf.mask).offset(((sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w) as isize); + let prev_mask = (sby - 1 >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w; + let mask_offset = (sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w; let start = sby * sbsz; if sby != 0 { let ss_ver = @@ -4624,9 +4621,10 @@ pub(crate) unsafe fn rav1d_filter_sbrow_cdef( ]; rav1d_cdef_brow::(c, tc, f, &p_up, prev_mask, start - 2, start, true, sby); } + let n_blks = sbsz - 2 * ((sby + 1) < f.sbh) as c_int; let end = cmp::min(start + n_blks, f.bh); - rav1d_cdef_brow::(c, tc, f, &p, mask, start, end, false, sby); + rav1d_cdef_brow::(c, tc, f, &p, mask_offset, start, end, false, sby); } pub(crate) unsafe fn rav1d_filter_sbrow_resize(