From 5e5fb7b17e8e18ba90ee760334f7e63e39196a29 Mon Sep 17 00:00:00 2001 From: Nicole LeGare Date: Mon, 26 Feb 2024 16:48:07 -0800 Subject: [PATCH] `Rav1dFrameContext_lf::lr_mask`: Make into a `Vec` * Inline `setup_tile` into its call site to fix borrow conflicts. * Remove unnecessary `lr_mask_sz` field. --- src/decode.rs | 232 ++++++++++++++++++++++-------------------------- src/internal.rs | 3 +- src/lf_mask.rs | 1 + src/lib.rs | 2 +- src/lr_apply.rs | 4 +- 5 files changed, 111 insertions(+), 131 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 0b3cf9c56..ce5041515 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -153,7 +153,6 @@ use crate::src::lf_mask::rav1d_calc_lf_values; use crate::src::lf_mask::rav1d_create_lf_mask_inter; use crate::src::lf_mask::rav1d_create_lf_mask_intra; use crate::src::lf_mask::Av1Filter; -use crate::src::lf_mask::Av1Restoration; use crate::src::lf_mask::Av1RestorationUnit; use crate::src::log::Rav1dLog as _; use crate::src::loopfilter::rav1d_loop_filter_dsp_init; @@ -3839,117 +3838,12 @@ static ss_size_mul: enum_map_ty!(Rav1dPixelLayout, [u8; 2]) = enum_map!(Rav1dPix I444 => [12, 8], }); -unsafe fn setup_tile( - c: &Rav1dContext, - ts: &mut Rav1dTileState, - f: &Rav1dFrameData, - data: &[u8], - tile_row: usize, - tile_col: usize, - tile_start_off: usize, -) { - let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); - let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); - - let col_sb_start = frame_hdr.tiling.col_start_sb[tile_col] as c_int; - let col_sb128_start = col_sb_start >> (seq_hdr.sb128 == 0) as c_int; - let col_sb_end = frame_hdr.tiling.col_start_sb[tile_col + 1] as c_int; - let row_sb_start = frame_hdr.tiling.row_start_sb[tile_row] as c_int; - let row_sb_end = frame_hdr.tiling.row_start_sb[tile_row + 1] as c_int; - let sb_shift = f.sb_shift; - - let size_mul = &ss_size_mul[f.cur.p.layout]; - for p in 0..2 { - ts.frame_thread[p].pal_idx = if !(f.frame_thread.pal_idx).is_empty() { - f.frame_thread.pal_idx[tile_start_off * size_mul[1] as usize / 4..].as_ptr() as *mut u8 - } else { - ptr::null_mut() - }; - ts.frame_thread[p].cf = if !f.frame_thread.cf.is_empty() { - f.frame_thread.cf - [(tile_start_off * size_mul[0] as usize >> (seq_hdr.hbd == 0) as c_int) as usize..] - .as_ptr() - .cast::() as *mut _ - } else { - ptr::null_mut() - }; - } - - rav1d_cdf_thread_copy(&mut ts.cdf, &f.in_cdf); - ts.last_qidx = frame_hdr.quant.yac; - ts.last_delta_lf.fill(0); - - rav1d_msac_init( - &mut ts.msac, - data.as_ptr(), - data.len(), - frame_hdr.disable_cdf_update != 0, - ); - - ts.tiling.row = tile_row as c_int; - ts.tiling.col = tile_col as c_int; - ts.tiling.col_start = col_sb_start << sb_shift; - ts.tiling.col_end = cmp::min(col_sb_end << sb_shift, f.bw); - ts.tiling.row_start = row_sb_start << sb_shift; - ts.tiling.row_end = cmp::min(row_sb_end << sb_shift, f.bh); - let diff_width = frame_hdr.size.width[0] != frame_hdr.size.width[1]; - - // Reference Restoration Unit (used for exp coding) - let (sb_idx, unit_idx) = if diff_width { - // vertical components only - ( - (ts.tiling.row_start >> 5) * f.sr_sb128w, - (ts.tiling.row_start & 16) >> 3, - ) - } else { - ( - (ts.tiling.row_start >> 5) * f.sb128w + col_sb128_start, - ((ts.tiling.row_start & 16) >> 3) + ((ts.tiling.col_start & 16) >> 4), - ) - }; - for p in 0..3 { - if !((f.lf.restore_planes >> p) & 1 != 0) { - continue; - } - - let lr_ref = if diff_width { - let ss_hor = (p != 0 && f.cur.p.layout != Rav1dPixelLayout::I444) as c_int; - let d = frame_hdr.size.super_res.width_scale_denominator; - let unit_size_log2 = frame_hdr.restoration.unit_size[(p != 0) as usize]; - let rnd = (8 << unit_size_log2) - 1; - let shift = unit_size_log2 + 3; - let x = (4 * ts.tiling.col_start * d >> ss_hor) + rnd >> shift; - let px_x = x << unit_size_log2 + ss_hor; - let u_idx = unit_idx + ((px_x & 64) >> 6); - let sb128x = px_x >> 7; - if sb128x >= f.sr_sb128w { - continue; - } - &mut (*f.lf.lr_mask.offset((sb_idx + sb128x) as isize)).lr[p][u_idx as usize] - } else { - &mut (*f.lf.lr_mask.offset(sb_idx as isize)).lr[p][unit_idx as usize] - }; - - *lr_ref = Av1RestorationUnit { - filter_v: [3, -7, 15], - filter_h: [3, -7, 15], - sgr_weights: [-32, 31], - ..*lr_ref - }; - ts.lr_ref[p] = *lr_ref; - } - - if c.tc.len() > 1 { - ts.progress.fill_with(|| AtomicI32::new(row_sb_start)); - } -} - unsafe fn read_restoration_info( t: &mut Rav1dTaskContext, - f: &Rav1dFrameData, lr: &mut Av1RestorationUnit, p: usize, frame_type: Rav1dRestorationType, + debug_block_info: bool, ) { let ts = &mut *t.ts; let lr_ref = ts.lr_ref[p]; @@ -4005,7 +3899,7 @@ unsafe fn read_restoration_info( lr.filter_h[2] = msac_decode_lr_subexp(ts, lr_ref.filter_h[2], 3, 17); lr.sgr_weights = lr_ref.sgr_weights; ts.lr_ref[p] = *lr; - if debug_block_info!(f, t) { + if debug_block_info { println!( "Post-lr_wiener[pl={},v[{},{},{}],h[{},{},{}]]: r={}", p, @@ -4035,7 +3929,7 @@ unsafe fn read_restoration_info( lr.filter_v = lr_ref.filter_v; lr.filter_h = lr_ref.filter_h; ts.lr_ref[p] = *lr; - if debug_block_info!(f, t) { + if debug_block_info { println!( "Post-lr_sgrproj[pl={},idx={},w[{},{}]]: r={}", p, lr.sgr_idx, lr.sgr_weights[0], lr.sgr_weights[1], ts.msac.rng, @@ -4144,7 +4038,7 @@ pub(crate) unsafe fn rav1d_decode_tile_sbrow( cdef_idx[0] = -1; t.cur_sb_cdef_idx_ptr = cdef_idx.as_mut_ptr(); } - let frame_hdr = f.frame_hdr(); + let frame_hdr = f.frame_hdr.as_ref().unwrap(); // Restoration filter for p in 0..3 { if (f.lf.restore_planes >> p) & 1 == 0 { @@ -4185,10 +4079,9 @@ pub(crate) unsafe fn rav1d_decode_tile_sbrow( let px_x = x << unit_size_log2 + ss_hor; let sb_idx = (t.by >> 5) * f.sr_sb128w + (px_x >> 7); let unit_idx = ((t.by & 16) >> 3) + ((px_x & 64) >> 6); - let lr = - &mut (*(f.lf.lr_mask).offset(sb_idx as isize)).lr[p][unit_idx as usize]; + let lr = &mut f.lf.lr_mask[sb_idx as usize].lr[p][unit_idx as usize]; - read_restoration_info(t, f, lr, p, frame_type); + read_restoration_info(t, lr, p, frame_type, debug_block_info!(f, t)); } } else { let x = 4 * t.bx >> ss_hor; @@ -4203,9 +4096,9 @@ pub(crate) unsafe fn rav1d_decode_tile_sbrow( } let sb_idx = (t.by >> 5) * f.sr_sb128w + (t.bx >> 5); let unit_idx = ((t.by & 16) >> 3) + ((t.bx & 16) >> 4); - let lr = &mut (*(f.lf.lr_mask).offset(sb_idx as isize)).lr[p][unit_idx as usize]; + let lr = &mut f.lf.lr_mask[sb_idx as usize].lr[p][unit_idx as usize]; - read_restoration_info(t, f, lr, p, frame_type); + read_restoration_info(t, lr, p, frame_type, debug_block_info!(f, t)); } } decode_sb(c, t, f, root_bl, c.intra_edge.root(root_bl))?; @@ -4526,16 +4419,9 @@ pub(crate) unsafe fn rav1d_decode_frame_init( f.sr_sb128w = f.sr_cur.p.p.w + 127 >> 7; let lr_mask_sz = f.sr_sb128w * f.sb128h; - if lr_mask_sz != f.lf.lr_mask_sz { - freep(&mut f.lf.lr_mask as *mut *mut Av1Restoration as *mut c_void); - f.lf.lr_mask = malloc(::core::mem::size_of::() * lr_mask_sz as usize) - as *mut Av1Restoration; - if f.lf.lr_mask.is_null() { - f.lf.lr_mask_sz = 0; - return Err(ENOMEM); - } - f.lf.lr_mask_sz = lr_mask_sz; - } + // TODO: fallible allocation + f.lf.lr_mask + .resize_with(lr_mask_sz as usize, Default::default); f.lf.restore_planes = frame_hdr .restoration .r#type @@ -4725,7 +4611,101 @@ pub(crate) unsafe fn rav1d_decode_frame_init_cdf( }; let (cur_data, rest_data) = data.split_at(tile_sz); - setup_tile(c, ts, f, cur_data, tile_row, tile_col, tile_start_off); + let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); + + let col_sb_start = frame_hdr.tiling.col_start_sb[tile_col] as c_int; + let col_sb128_start = col_sb_start >> (seq_hdr.sb128 == 0) as c_int; + let col_sb_end = frame_hdr.tiling.col_start_sb[tile_col + 1] as c_int; + let row_sb_start = frame_hdr.tiling.row_start_sb[tile_row] as c_int; + let row_sb_end = frame_hdr.tiling.row_start_sb[tile_row + 1] as c_int; + let sb_shift = f.sb_shift; + + let size_mul = &ss_size_mul[f.cur.p.layout]; + for p in 0..2 { + ts.frame_thread[p].pal_idx = if !(f.frame_thread.pal_idx).is_empty() { + f.frame_thread.pal_idx[tile_start_off * size_mul[1] as usize / 4..].as_ptr() + as *mut u8 + } else { + ptr::null_mut() + }; + ts.frame_thread[p].cf = if !f.frame_thread.cf.is_empty() { + f.frame_thread.cf[(tile_start_off * size_mul[0] as usize + >> (seq_hdr.hbd == 0) as c_int) + as usize..] + .as_ptr() + .cast::() as *mut _ + } else { + ptr::null_mut() + }; + } + + rav1d_cdf_thread_copy(&mut ts.cdf, &f.in_cdf); + ts.last_qidx = frame_hdr.quant.yac; + ts.last_delta_lf.fill(0); + + rav1d_msac_init( + &mut ts.msac, + cur_data.as_ptr(), + cur_data.len(), + frame_hdr.disable_cdf_update != 0, + ); + + ts.tiling.row = tile_row as c_int; + ts.tiling.col = tile_col as c_int; + ts.tiling.col_start = col_sb_start << sb_shift; + ts.tiling.col_end = cmp::min(col_sb_end << sb_shift, f.bw); + ts.tiling.row_start = row_sb_start << sb_shift; + ts.tiling.row_end = cmp::min(row_sb_end << sb_shift, f.bh); + let diff_width = frame_hdr.size.width[0] != frame_hdr.size.width[1]; + + // Reference Restoration Unit (used for exp coding) + let (sb_idx, unit_idx) = if diff_width { + // vertical components only + ( + (ts.tiling.row_start >> 5) * f.sr_sb128w, + (ts.tiling.row_start & 16) >> 3, + ) + } else { + ( + (ts.tiling.row_start >> 5) * f.sb128w + col_sb128_start, + ((ts.tiling.row_start & 16) >> 3) + ((ts.tiling.col_start & 16) >> 4), + ) + }; + for p in 0..3 { + if !((f.lf.restore_planes >> p) & 1 != 0) { + continue; + } + + let lr_ref = if diff_width { + let ss_hor = (p != 0 && f.cur.p.layout != Rav1dPixelLayout::I444) as c_int; + let d = frame_hdr.size.super_res.width_scale_denominator; + let unit_size_log2 = frame_hdr.restoration.unit_size[(p != 0) as usize]; + let rnd = (8 << unit_size_log2) - 1; + let shift = unit_size_log2 + 3; + let x = (4 * ts.tiling.col_start * d >> ss_hor) + rnd >> shift; + let px_x = x << unit_size_log2 + ss_hor; + let u_idx = unit_idx + ((px_x & 64) >> 6); + let sb128x = px_x >> 7; + if sb128x >= f.sr_sb128w { + continue; + } + &mut f.lf.lr_mask[(sb_idx + sb128x) as usize].lr[p][u_idx as usize] + } else { + &mut f.lf.lr_mask[sb_idx as usize].lr[p][unit_idx as usize] + }; + + *lr_ref = Av1RestorationUnit { + filter_v: [3, -7, 15], + filter_h: [3, -7, 15], + sgr_weights: [-32, 31], + ..*lr_ref + }; + ts.lr_ref[p] = *lr_ref; + } + + if c.tc.len() > 1 { + ts.progress.fill_with(|| AtomicI32::new(row_sb_start)); + } tile_col += 1; if tile_col == cols { diff --git a/src/internal.rs b/src/internal.rs index 517c96b4b..80f444078 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -448,9 +448,8 @@ pub struct Rav1dFrameContext_frame_thread { pub struct Rav1dFrameContext_lf { pub level: Vec<[u8; 4]>, pub mask: *mut Av1Filter, - pub lr_mask: *mut Av1Restoration, + pub lr_mask: Vec, pub mask_sz: c_int, /* w*h */ - pub lr_mask_sz: c_int, pub cdef_buf_plane_sz: [c_int; 2], /* stride*sbh*4 */ pub cdef_buf_sbh: c_int, pub lr_buf_plane_sz: [c_int; 2], /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */ diff --git a/src/lf_mask.rs b/src/lf_mask.rs index 77d9af515..5e5c53875 100644 --- a/src/lf_mask.rs +++ b/src/lf_mask.rs @@ -40,6 +40,7 @@ pub struct Av1Filter { pub noskip_mask: [[u16; 2]; 16], } +#[derive(Default)] #[repr(C)] pub struct Av1Restoration { pub lr: [[Av1RestorationUnit; 4]; 3], diff --git a/src/lib.rs b/src/lib.rs index 16115d880..134a77c40 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -913,7 +913,7 @@ impl Drop for Rav1dContext { free(f.a as *mut c_void); let _ = mem::take(&mut f.tiles); free(f.lf.mask as *mut c_void); - free(f.lf.lr_mask as *mut c_void); + let _ = mem::take(&mut f.lf.lr_mask); // TODO: remove when context is owned let _ = mem::take(&mut f.lf.level); free(f.lf.tx_lpf_right_edge[0] as *mut c_void); free(f.lf.start_of_tile_row as *mut c_void); diff --git a/src/lr_apply.rs b/src/lr_apply.rs index 8149057f6..0ea0cfe64 100644 --- a/src/lr_apply.rs +++ b/src/lr_apply.rs @@ -182,14 +182,14 @@ unsafe fn lr_sbrow( aligned_unit_pos <<= ss_ver; let sb_idx = (aligned_unit_pos >> 7) * f.sr_sb128w; let unit_idx = (aligned_unit_pos >> 6 & 1) << 1; - lr[0] = (*(f.lf.lr_mask).offset(sb_idx as isize)).lr[plane as usize][unit_idx as usize]; + lr[0] = f.lf.lr_mask[sb_idx as usize].lr[plane as usize][unit_idx as usize]; let mut restore = lr[0].r#type != RAV1D_RESTORATION_NONE; let mut x = 0; let mut bit = false; while x + max_unit_size <= w { let next_x = x + unit_size; let next_u_idx = unit_idx + (next_x >> shift_hor - 1 & 1); - lr[!bit as usize] = (*(f.lf.lr_mask).offset((sb_idx + (next_x >> shift_hor)) as isize)).lr + lr[!bit as usize] = f.lf.lr_mask[(sb_idx + (next_x >> shift_hor)) as usize].lr [plane as usize][next_u_idx as usize]; let restore_next = lr[!bit as usize].r#type != RAV1D_RESTORATION_NONE; if restore_next {