diff --git a/src/lr_apply.rs b/src/lr_apply.rs index 4b386b463..8149057f6 100644 --- a/src/lr_apply.rs +++ b/src/lr_apply.rs @@ -30,8 +30,8 @@ pub const LR_RESTORE_Y: LrRestorePlanes = 1; unsafe fn lr_stripe( c: &Rav1dContext, f: &Rav1dFrameData, - mut p: *mut BD::Pixel, - mut left: *const [BD::Pixel; 4], + mut p: &mut [BD::Pixel], + left: &[[BD::Pixel; 4]; 128 + 8], x: c_int, mut y: c_int, plane: c_int, @@ -41,24 +41,20 @@ unsafe fn lr_stripe( mut edges: LrEdgeFlags, ) { let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); - let dsp: *const Rav1dDSPContext = f.dsp; + let dsp: &Rav1dDSPContext = &*f.dsp; let chroma = (plane != 0) as c_int; - let ss_ver = chroma - & (f.sr_cur.p.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; + let ss_ver = chroma & (f.sr_cur.p.p.layout == Rav1dPixelLayout::I420) as c_int; let stride: ptrdiff_t = f.sr_cur.p.stride[chroma as usize]; - let sby = - y + (if y != 0 { - (8 as c_int) << ss_ver - } else { - 0 as c_int - }) >> 6 - ss_ver + seq_hdr.sb128; + let sby = y + (if y != 0 { 8 << ss_ver } else { 0 }) >> 6 - ss_ver + seq_hdr.sb128; let have_tt = (c.tc.len() > 1) as c_int; - let mut lpf: *const BD::Pixel = (f.lf.lr_lpf_line[plane as usize] as *mut BD::Pixel) - .offset( - (have_tt * (sby * ((4 as c_int) << seq_hdr.sb128) - 4)) as isize - * BD::pxstride(stride as usize) as isize, - ) - .offset(x as isize); + let lpf_stride = BD::pxstride(stride as usize) as isize; + let lpf_plane_sz = BD::pxstride(f.lf.lr_buf_plane_sz[(plane != 0) as usize] as usize) as isize; + let mut lpf_offset = cmp::max(lpf_stride - lpf_plane_sz, 0); + let lpf = &slice::from_raw_parts( + (f.lf.lr_lpf_line[plane as usize] as *const BD::Pixel).offset(-lpf_offset), + lpf_plane_sz.unsigned_abs(), + ); + lpf_offset += (have_tt * (sby * (4 << seq_hdr.sb128) - 4)) as isize * lpf_stride + x as isize; // The first stripe of the frame is shorter by 8 luma pixel rows. let mut stripe_h = cmp::min(64 - 8 * (y == 0) as c_int >> ss_ver, row_h - y); let lr_fn: looprestorationfilter_fn; @@ -66,111 +62,86 @@ unsafe fn lr_stripe( filter: [[0; 8]; 2].into(), }; if lr.r#type as c_int == RAV1D_RESTORATION_WIENER as c_int { - let filter: *mut [i16; 8] = (params.filter.0).as_mut_ptr(); - let ref mut fresh0 = (*filter.offset(0))[6]; - *fresh0 = lr.filter_h[0] as i16; - (*filter.offset(0))[0] = *fresh0; - let ref mut fresh1 = (*filter.offset(0))[5]; - *fresh1 = lr.filter_h[1] as i16; - (*filter.offset(0))[1] = *fresh1; - let ref mut fresh2 = (*filter.offset(0))[4]; - *fresh2 = lr.filter_h[2] as i16; - (*filter.offset(0))[2] = *fresh2; - (*filter.offset(0))[3] = (-((*filter.offset(0))[0] as c_int - + (*filter.offset(0))[1] as c_int - + (*filter.offset(0))[2] as c_int) - * 2) as i16; - let ref mut fresh3 = (*filter.offset(0))[3]; + let filter = &mut params.filter.0; + filter[0][0] = lr.filter_h[0] as i16; + filter[0][1] = lr.filter_h[1] as i16; + filter[0][2] = lr.filter_h[2] as i16; + filter[0][6] = lr.filter_h[0] as i16; + filter[0][5] = lr.filter_h[1] as i16; + filter[0][4] = lr.filter_h[2] as i16; + filter[0][3] = -(filter[0][0] + filter[0][1] + filter[0][2]) * 2; if BD::BITDEPTH != 8 { // For 8-bit SIMD it's beneficial to handle the +128 separately // in order to avoid overflows. - *fresh3 = (*fresh3 + 128) as i16; + filter[0][3] += 128; } - let ref mut fresh4 = (*filter.offset(1))[6]; - *fresh4 = lr.filter_v[0] as i16; - (*filter.offset(1))[0] = *fresh4; - let ref mut fresh5 = (*filter.offset(1))[5]; - *fresh5 = lr.filter_v[1] as i16; - (*filter.offset(1))[1] = *fresh5; - let ref mut fresh6 = (*filter.offset(1))[4]; - *fresh6 = lr.filter_v[2] as i16; - (*filter.offset(1))[2] = *fresh6; - (*filter.offset(1))[3] = (128 as c_int - - ((*filter.offset(1))[0] as c_int - + (*filter.offset(1))[1] as c_int - + (*filter.offset(1))[2] as c_int) - * 2) as i16; - lr_fn = (*dsp).lr.wiener[((*filter.offset(0))[0] as c_int | (*filter.offset(1))[0] as c_int - == 0) as c_int as usize]; + + filter[1][0] = lr.filter_v[0] as i16; + filter[1][1] = lr.filter_v[1] as i16; + filter[1][2] = lr.filter_v[2] as i16; + filter[1][6] = lr.filter_v[0] as i16; + filter[1][5] = lr.filter_v[1] as i16; + filter[1][4] = lr.filter_v[2] as i16; + filter[1][3] = 128 - (filter[1][0] + filter[1][1] + filter[1][2]) * 2; + + lr_fn = dsp.lr.wiener[((filter[0][0] | filter[1][0]) == 0) as usize]; } else { - if !(lr.r#type as c_int == RAV1D_RESTORATION_SGRPROJ as c_int) { - unreachable!(); - } - let sgr_params: *const u16 = (dav1d_sgr_params[lr.sgr_idx as usize]).as_ptr(); - params.sgr.s0 = *sgr_params.offset(0) as u32; - params.sgr.s1 = *sgr_params.offset(1) as u32; + assert_eq!(lr.r#type, RAV1D_RESTORATION_SGRPROJ); + let sgr_params = &dav1d_sgr_params[lr.sgr_idx as usize]; + params.sgr.s0 = sgr_params[0] as u32; + params.sgr.s1 = sgr_params[1] as u32; params.sgr.w0 = lr.sgr_weights[0] as i16; - params.sgr.w1 = - (128 as c_int - (lr.sgr_weights[0] as c_int + lr.sgr_weights[1] as c_int)) as i16; - lr_fn = (*dsp).lr.sgr[((*sgr_params.offset(0) != 0) as c_int - + (*sgr_params.offset(1) != 0) as c_int * 2 - - 1) as usize]; + params.sgr.w1 = 128 - (lr.sgr_weights[0] as i16 + lr.sgr_weights[1] as i16); + lr_fn = dsp.lr.sgr[(sgr_params[0] != 0) as usize + (sgr_params[1] != 0) as usize * 2 - 1]; } + let mut left = &left[..]; while y + stripe_h <= row_h { // Change the HAVE_BOTTOM bit in edges to (sby + 1 != f->sbh || y + stripe_h != row_h) - edges = ::core::mem::transmute::( - edges as c_uint - ^ (-((sby + 1 != f.sbh || y + stripe_h != row_h) as c_int) as c_uint - ^ edges as c_uint) - & LR_HAVE_BOTTOM as c_int as c_uint, - ); + edges ^= (-((sby + 1 != f.sbh || y + stripe_h != row_h) as c_int) as LrEdgeFlags ^ edges) + & LR_HAVE_BOTTOM; lr_fn( - p.cast(), + p.as_mut_ptr().cast(), stride, - left.cast(), - lpf.cast(), + left.as_ptr().cast(), + lpf.as_ptr().offset(lpf_offset).cast(), unit_w, stripe_h, &mut params, edges, f.bitdepth_max, ); - left = left.offset(stripe_h as isize); + left = &left[stripe_h as usize..]; y += stripe_h; - p = p.offset(stripe_h as isize * BD::pxstride(stride as usize) as isize); - edges = ::core::mem::transmute::( - edges as c_uint | LR_HAVE_TOP as c_int as c_uint, - ); + let p_offset = stripe_h as isize * BD::pxstride(stride as usize) as isize; + edges |= LR_HAVE_TOP; stripe_h = cmp::min(64 >> ss_ver, row_h - y); if stripe_h == 0 { break; } - lpf = lpf.offset(4 * BD::pxstride(stride as usize) as isize); + p = &mut p[p_offset as usize..]; + + lpf_offset += 4 * lpf_stride; } } -unsafe fn backup4xU( - mut dst: *mut [BD::Pixel; 4], - mut src: *const BD::Pixel, +fn backup4xU( + dst: &mut [[BD::Pixel; 4]; 128 + 8], + src: &[BD::Pixel], src_stride: ptrdiff_t, - mut u: c_int, + u: c_int, ) { - while u > 0 { - BD::pixel_copy( - slice::from_raw_parts_mut(&mut *dst as *mut BD::Pixel, 4), - slice::from_raw_parts(&*src as *const BD::Pixel, 4), - 4, - ); - u -= 1; - dst = dst.offset(1); - src = src.offset(BD::pxstride(src_stride as usize) as isize); + for (src, dst) in src + .chunks(BD::pxstride(src_stride as usize)) + .zip(&mut dst[..u as usize]) + { + BD::pixel_copy(dst, src, 4); } } unsafe fn lr_sbrow( c: &Rav1dContext, f: &Rav1dFrameData, - mut p: *mut BD::Pixel, + mut p: &mut [BD::Pixel], y: c_int, w: c_int, h: c_int, @@ -178,25 +149,32 @@ unsafe fn lr_sbrow( plane: c_int, ) { let chroma = (plane != 0) as c_int; - let ss_ver = chroma - & (f.sr_cur.p.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; - let ss_hor = chroma - & (f.sr_cur.p.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; + let ss_ver = chroma & (f.sr_cur.p.p.layout == Rav1dPixelLayout::I420) as c_int; + let ss_hor = chroma & (f.sr_cur.p.p.layout != Rav1dPixelLayout::I444) as c_int; let p_stride: ptrdiff_t = f.sr_cur.p.stride[chroma as usize]; let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); - let unit_size_log2 = frame_hdr.restoration.unit_size[(plane != 0) as c_int as usize]; + let unit_size_log2 = frame_hdr.restoration.unit_size[(plane != 0) as usize]; let unit_size = (1 as c_int) << unit_size_log2; let half_unit_size = unit_size >> 1; let max_unit_size = unit_size + half_unit_size; + + // Y coordinate of the sbrow (y is 8 luma pixel rows above row_y) let row_y = y + (8 >> ss_ver) * (y != 0) as c_int; + + // FIXME This is an ugly hack to lookup the proper AV1Filter unit for + // chroma planes. Question: For Multithreaded decoding, is it better + // to store the chroma LR information with collocated Luma information? + // In other words. For a chroma restoration unit locate at 128,128 and + // with a 4:2:0 chroma subsampling, do we store the filter information at + // the AV1Filter unit located at (128,128) or (256,256) + // TODO Support chroma subsampling. let shift_hor = 7 - ss_hor; - let mut pre_lr_border: Align16<[[[BD::Pixel; 4]; 136]; 2]> = Align16([[[0.into(); 4]; 136]; 2]); + + // maximum sbrow height is 128 + 8 rows offset + let mut pre_lr_border: Align16<[[[BD::Pixel; 4]; 128 + 8]; 2]> = + Align16([[[0.into(); 4]; 128 + 8]; 2]); let mut lr = [Av1RestorationUnit::default(); 2]; - let mut edges: LrEdgeFlags = ((if y > 0 { - LR_HAVE_TOP as c_int - } else { - 0 as c_int - }) | LR_HAVE_RIGHT as c_int) as LrEdgeFlags; + let mut edges: LrEdgeFlags = (if y > 0 { LR_HAVE_TOP } else { 0 }) | LR_HAVE_RIGHT; let mut aligned_unit_pos = row_y & !(unit_size - 1); if aligned_unit_pos != 0 && aligned_unit_pos + half_unit_size > h { aligned_unit_pos -= unit_size; @@ -205,31 +183,29 @@ unsafe fn lr_sbrow( let sb_idx = (aligned_unit_pos >> 7) * f.sr_sb128w; let unit_idx = (aligned_unit_pos >> 6 & 1) << 1; lr[0] = (*(f.lf.lr_mask).offset(sb_idx as isize)).lr[plane as usize][unit_idx as usize]; - let mut restore = (lr[0].r#type as c_int != RAV1D_RESTORATION_NONE as c_int) as c_int; + let mut restore = lr[0].r#type != RAV1D_RESTORATION_NONE; let mut x = 0; - let mut bit = 0; + let mut bit = false; while x + max_unit_size <= w { let next_x = x + unit_size; let next_u_idx = unit_idx + (next_x >> shift_hor - 1 & 1); - lr[(bit == 0) as c_int as usize] = (*(f.lf.lr_mask) - .offset((sb_idx + (next_x >> shift_hor)) as isize)) - .lr[plane as usize][next_u_idx as usize]; - let restore_next = (lr[(bit == 0) as c_int as usize].r#type as c_int - != RAV1D_RESTORATION_NONE as c_int) as c_int; - if restore_next != 0 { + lr[!bit as usize] = (*(f.lf.lr_mask).offset((sb_idx + (next_x >> shift_hor)) as isize)).lr + [plane as usize][next_u_idx as usize]; + let restore_next = lr[!bit as usize].r#type != RAV1D_RESTORATION_NONE; + if restore_next { backup4xU::( - (pre_lr_border[bit as usize]).as_mut_ptr(), - p.offset(unit_size as isize).offset(-(4 as c_int as isize)), + &mut pre_lr_border[bit as usize], + &p[unit_size as usize - 4..], p_stride, row_h - y, ); } - if restore != 0 { + if restore { lr_stripe::( c, f, p, - (pre_lr_border[(bit == 0) as c_int as usize]).as_mut_ptr() as *const [BD::Pixel; 4], + &pre_lr_border[!bit as usize], x, y, plane, @@ -241,22 +217,18 @@ unsafe fn lr_sbrow( } x = next_x; restore = restore_next; - p = p.offset(unit_size as isize); - edges = ::core::mem::transmute::( - edges as c_uint | LR_HAVE_LEFT as c_int as c_uint, - ); - bit ^= 1 as c_int; + p = &mut p[unit_size as usize..]; + edges |= LR_HAVE_LEFT; + bit = !bit; } - if restore != 0 { - edges = ::core::mem::transmute::( - edges as c_uint & !(LR_HAVE_RIGHT as c_int) as c_uint, - ); + if restore { + edges &= !LR_HAVE_RIGHT; let unit_w = w - x; lr_stripe::( c, f, p, - (pre_lr_border[(bit == 0) as c_int as usize]).as_mut_ptr() as *const [BD::Pixel; 4], + &pre_lr_border[!bit as usize], x, y, plane, @@ -271,11 +243,12 @@ unsafe fn lr_sbrow( pub(crate) unsafe fn rav1d_lr_sbrow( c: &Rav1dContext, f: &mut Rav1dFrameData, - dst: *const *mut BD::Pixel, + dst: &mut [&mut [BD::Pixel]; 3], + dst_offset: &[usize; 2], sby: c_int, ) { let offset_y = 8 * (sby != 0) as c_int; - let dst_stride: *const ptrdiff_t = (f.sr_cur.p.stride).as_mut_ptr(); + let dst_stride = &f.sr_cur.p.stride; let restore_planes = f.lf.restore_planes; let not_last = ((sby + 1) < f.sbh) as c_int; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); @@ -288,53 +261,50 @@ pub(crate) unsafe fn rav1d_lr_sbrow( lr_sbrow::( c, f, - (*dst.offset(0)).offset( - -(offset_y as isize * BD::pxstride(*dst_stride.offset(0) as usize) as isize), - ), + &mut dst[0][dst_offset[0] + - (offset_y as isize * BD::pxstride(dst_stride[0] as usize) as isize) as usize..], y_stripe, w, h, row_h, - 0 as c_int, + 0, ); } - if restore_planes & (LR_RESTORE_U as c_int | LR_RESTORE_V as c_int) != 0 { - let ss_ver = - (f.sr_cur.p.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; - let ss_hor = - (f.sr_cur.p.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; - let h_0 = f.sr_cur.p.p.h + ss_ver >> ss_ver; - let w_0 = f.sr_cur.p.p.w + ss_hor >> ss_hor; - let next_row_y_0 = (sby + 1) << 6 - ss_ver + seq_hdr.sb128; - let row_h_0 = cmp::min(next_row_y_0 - (8 >> ss_ver) * not_last, h_0); + if restore_planes & (LR_RESTORE_U | LR_RESTORE_V) as c_int != 0 { + let ss_ver = (f.sr_cur.p.p.layout == Rav1dPixelLayout::I420) as c_int; + let ss_hor = (f.sr_cur.p.p.layout != Rav1dPixelLayout::I444) as c_int; + let h = f.sr_cur.p.p.h + ss_ver >> ss_ver; + let w = f.sr_cur.p.p.w + ss_hor >> ss_hor; + let next_row_y = (sby + 1) << 6 - ss_ver + seq_hdr.sb128; + let row_h = cmp::min(next_row_y - (8 >> ss_ver) * not_last, h); let offset_uv = offset_y >> ss_ver; - let y_stripe_0 = (sby << 6 - ss_ver + seq_hdr.sb128) - offset_uv; + let y_stripe = (sby << 6 - ss_ver + seq_hdr.sb128) - offset_uv; if restore_planes & LR_RESTORE_U as c_int != 0 { lr_sbrow::( c, f, - (*dst.offset(1)).offset( - -(offset_uv as isize * BD::pxstride(*dst_stride.offset(1) as usize) as isize), - ), - y_stripe_0, - w_0, - h_0, - row_h_0, - 1 as c_int, + &mut dst[1][dst_offset[1] + - (offset_uv as isize * BD::pxstride(dst_stride[1] as usize) as isize) + as usize..], + y_stripe, + w, + h, + row_h, + 1, ); } if restore_planes & LR_RESTORE_V as c_int != 0 { lr_sbrow::( c, f, - (*dst.offset(2)).offset( - -(offset_uv as isize * BD::pxstride(*dst_stride.offset(1) as usize) as isize), - ), - y_stripe_0, - w_0, - h_0, - row_h_0, - 2 as c_int, + &mut dst[2][dst_offset[1] + - (offset_uv as isize * BD::pxstride(dst_stride[1] as usize) as isize) + as usize..], + y_stripe, + w, + h, + row_h, + 2, ); } } diff --git a/src/recon.rs b/src/recon.rs index 6d491c44e..98113bcb9 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -4707,15 +4707,26 @@ pub(crate) unsafe fn rav1d_filter_sbrow_lr( } let y = sby * f.sb_step * 4; let ss_ver = (f.cur.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; - let sr_p: [*mut BD::Pixel; 3] = [ - (f.lf.sr_p[0] as *mut BD::Pixel) - .offset(y as isize * BD::pxstride(f.sr_cur.p.stride[0] as usize) as isize), - (f.lf.sr_p[1] as *mut BD::Pixel) - .offset(y as isize * BD::pxstride(f.sr_cur.p.stride[1] as usize) as isize >> ss_ver), - (f.lf.sr_p[2] as *mut BD::Pixel) - .offset(y as isize * BD::pxstride(f.sr_cur.p.stride[1] as usize) as isize >> ss_ver), + let h = (*f).sr_cur.p.p.h + 127 & !127; + let mut sr_p: [&mut [BD::Pixel]; 3] = [ + slice::from_raw_parts_mut( + f.lf.sr_p[0] as *mut BD::Pixel, + (h as isize * BD::pxstride(f.sr_cur.p.stride[0] as usize) as isize) as usize, + ), + slice::from_raw_parts_mut( + f.lf.sr_p[1] as *mut BD::Pixel, + (h as isize * BD::pxstride(f.sr_cur.p.stride[1] as usize) as isize) as usize >> ss_ver, + ), + slice::from_raw_parts_mut( + f.lf.sr_p[2] as *mut BD::Pixel, + (h as isize * BD::pxstride(f.sr_cur.p.stride[1] as usize) as isize) as usize >> ss_ver, + ), + ]; + let sr_p_offset: [usize; 2] = [ + (y as isize * BD::pxstride(f.sr_cur.p.stride[0] as usize) as isize) as usize, + (y as isize * BD::pxstride(f.sr_cur.p.stride[1] as usize) as isize >> ss_ver) as usize, ]; - rav1d_lr_sbrow::(c, f, sr_p.as_ptr(), sby); + rav1d_lr_sbrow::(c, f, &mut sr_p, &sr_p_offset, sby); } pub(crate) unsafe fn rav1d_filter_sbrow(