Skip to content

Commit

Permalink
struct Rav1dFrameContext_lf: Add inner mutability to lr_line_buf
Browse files Browse the repository at this point in the history
Adds inner mutability to `lr_line_buf` by using `DisjointMut`
  • Loading branch information
rinon committed Apr 20, 2024
1 parent b3238e1 commit 4245a60
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 65 deletions.
30 changes: 16 additions & 14 deletions src/cdef_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::include::common::bitdepth::BPC;
use crate::include::common::intops::ulog2;
use crate::include::dav1d::headers::Rav1dPixelLayout;
use crate::src::align::Align16;
use crate::src::align::AlignedVec32;
use crate::src::align::AlignedVec64;
use crate::src::cdef::CdefEdgeFlags;
use crate::src::disjoint_mut::DisjointMut;
use crate::src::internal::Rav1dContext;
Expand Down Expand Up @@ -37,7 +37,7 @@ impl Backup2x8Flags {

/// `dst_buf` is a buffer of `BD::Pixel` elements
unsafe fn backup2lines<BD: BitDepth>(
dst_buf: &DisjointMut<AlignedVec32<u8>>,
dst_buf: &DisjointMut<AlignedVec64<u8>>,
dst_off: [usize; 3],
src: &[*mut BD::Pixel; 3],
stride: &[ptrdiff_t; 2],
Expand Down Expand Up @@ -327,8 +327,6 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let mut offset: ptrdiff_t;
let st_y: bool;

let lr_line_buf_lock = f.lf.lr_line_buf.read().unwrap();
let lr_line_buf = BD::cast_pixel_slice(&lr_line_buf_lock);
if !have_tt {
st_y = true;
} else if sbrow_start && by == by_start {
Expand All @@ -341,7 +339,10 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
} else {
offset = (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride
+ (bx * 4) as isize;
top = lr_line_buf.as_ptr().add(f.lf.lr_lpf_line[0]).offset(offset);
top = &*f
.lf
.lr_line_buf
.element_as((f.lf.lr_lpf_line[0] as isize + offset) as usize);
}
bot = bptrs[0].offset(8 * y_stride as isize);
st_y = false;
Expand All @@ -359,7 +360,10 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
} else {
let line = sby * ((4 as c_int) << sb128) + 4 * sb128 + 2;
offset = line as isize * y_stride + (bx * 4) as isize;
bot = lr_line_buf.as_ptr().add(f.lf.lr_lpf_line[0]).offset(offset);
bot = &*f
.lf
.lr_line_buf
.element_as((f.lf.lr_lpf_line[0] as isize + offset) as usize);
}
st_y = false;
} else {
Expand Down Expand Up @@ -431,10 +435,9 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let line_0 = sby * ((4 as c_int) << sb128) - 4;
offset = line_0 as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
top = lr_line_buf
.as_ptr()
.add(f.lf.lr_lpf_line[pl])
.offset(offset);
top = &*f.lf.lr_line_buf.element_as(
(f.lf.lr_lpf_line[pl] as isize + offset) as usize,
);
}
bot = bptrs[pl].offset(((8 >> ss_ver) * uv_stride) as isize);
st_uv = false;
Expand All @@ -455,10 +458,9 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let line = sby * ((4 as c_int) << sb128) + 4 * sb128 + 2;
offset =
line as isize * uv_stride + (bx * 4 >> ss_hor) as isize;
bot = lr_line_buf
.as_ptr()
.add(f.lf.lr_lpf_line[pl])
.offset(offset);
bot = &*f.lf.lr_line_buf.element_as(
(f.lf.lr_lpf_line[pl] as isize + offset) as usize,
);
}
st_uv = false;
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4218,7 +4218,7 @@ pub(crate) unsafe fn rav1d_decode_frame_init(
alloc_sz += y_stride.unsigned_abs() * num_lines as usize;
alloc_sz += uv_stride.unsigned_abs() * num_lines as usize * 2;
// TODO: Fallible allocation
f.lf.lr_line_buf.try_write().unwrap().resize(alloc_sz, 0);
f.lf.lr_line_buf.resize(alloc_sz, 0);

let y_stride_px = bpc.pxstride(y_stride);
let uv_stride_px = bpc.pxstride(uv_stride);
Expand Down
5 changes: 2 additions & 3 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ use std::sync::Arc;
use std::sync::Condvar;
use std::sync::Mutex;
use std::sync::OnceLock;
use std::sync::RwLock;
use std::thread::JoinHandle;

#[repr(C)]
Expand Down Expand Up @@ -629,8 +628,8 @@ pub struct Rav1dFrameContext_lf {
pub last_sharpness: c_int,
pub lvl: [[[[u8; 2]; 8]; 4]; 8], /* [8 seg_id][4 dir][8 ref][2 is_gmv] */
pub tx_lpf_right_edge: TxLpfRightEdge,
pub cdef_line_buf: DisjointMut<AlignedVec32<u8>>, /* AlignedVec32<DynPixel> */
pub lr_line_buf: RwLock<AlignedVec64<u8>>,
pub cdef_line_buf: DisjointMut<AlignedVec64<u8>>, /* AlignedVec32<DynPixel> */
pub lr_line_buf: DisjointMut<AlignedVec64<u8>>,
pub cdef_line: [[usize; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [usize; 3], /* plane */
pub lr_lpf_line: [usize; 3], /* plane */
Expand Down
98 changes: 54 additions & 44 deletions src/lf_apply.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use crate::include::common::bitdepth::BitDepth;
use crate::include::common::bitdepth::DynPixel;
use crate::include::dav1d::headers::Rav1dFrameHeader;
use crate::include::dav1d::headers::Rav1dPixelLayout;
use crate::src::align::AlignedVec64;
use crate::src::disjoint_mut::DisjointMut;
use crate::src::internal::Rav1dContext;
use crate::src::internal::Rav1dDSPContext;
use crate::src::internal::Rav1dFrameData;
Expand All @@ -22,8 +25,8 @@ use std::sync::atomic::Ordering;
// stripe with the top of the next super block row.
unsafe fn backup_lpf<BD: BitDepth>(
c: &Rav1dContext,
dst: &mut [BD::Pixel],
mut dst_offset: usize,
dst: &DisjointMut<AlignedVec64<u8>>,
mut dst_offset: usize, // in pixel units
dst_stride: ptrdiff_t,
src: &[BD::Pixel],
mut src_offset: usize,
Expand Down Expand Up @@ -59,19 +62,23 @@ unsafe fn backup_lpf<BD: BitDepth>(
let top_size = top * px_abs_stride;
// Copy the top part of the stored loop filtered pixels from the
// previous sb row needed above the first stripe of this sb row.
let (dst, dst_top) = if dst_stride < 0 {
let dst = &mut dst[dst_offset - top_size - 3 * px_abs_stride..];
let (dst_top, dst) = dst.split_at_mut(top_size);
(dst, dst_top)
let (dst_idx, src_idx) = if dst_stride < 0 {
(
dst_offset - 3 * px_abs_stride,
dst_offset - top_size - 3 * px_abs_stride,
)
} else {
let dst = &mut dst[dst_offset..];
dst.split_at_mut(top_size)
(dst_offset, dst_offset + top_size)
};

for i in 0..4 {
BD::pixel_copy(
&mut dst[i * px_abs_stride..],
&dst_top[i * px_abs_stride..],
&mut dst.mut_slice_as(
dst_idx + i * px_abs_stride..dst_idx + i * px_abs_stride + dst_w as usize,
),
&dst.slice_as(
src_idx + i * px_abs_stride..src_idx + i * px_abs_stride + dst_w as usize,
),
dst_w as usize,
);
}
Expand All @@ -81,8 +88,9 @@ unsafe fn backup_lpf<BD: BitDepth>(
if lr_backup != 0 && frame_hdr.size.width[0] != frame_hdr.size.width[1] {
while row + stripe_h <= row_h {
let n_lines = 4 - (row + stripe_h + 1 == h) as c_int;
(dsp.mc.resize)(
dst.as_mut_ptr().add(dst_offset).cast(),
let mut dst_guard = dst.mut_slice_as(dst_offset..dst_offset + dst_w as usize);
((*dsp).mc.resize)(
dst_guard.as_mut_ptr() as *mut BD::Pixel as *mut DynPixel,
dst_stride,
src.as_ptr().add(src_offset).cast(),
src_stride,
Expand All @@ -102,13 +110,16 @@ unsafe fn backup_lpf<BD: BitDepth>(

if n_lines == 3 {
let dst_abs_px_stride = BD::pxstride(dst_stride.unsigned_abs());
let (src_tmp, dst_tmp) = if dst_stride < 0 {
let (dst_tmp, src_tmp) = dst[dst_offset..].split_at_mut(dst_abs_px_stride);
(src_tmp, dst_tmp)
let (src_idx, dst_idx) = if dst_stride < 0 {
(dst_offset + dst_abs_px_stride, dst_offset)
} else {
dst[dst_offset - dst_abs_px_stride..].split_at_mut(dst_abs_px_stride)
(dst_offset - dst_abs_px_stride, dst_offset)
};
BD::pixel_copy(dst_tmp, src_tmp, dst_w as usize);
BD::pixel_copy(
&mut dst.mut_slice_as(dst_idx..dst_idx + dst_w as usize),
&dst.slice_as(src_idx..src_idx + dst_w as usize),
dst_w as usize,
);
dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize;
}
}
Expand All @@ -117,17 +128,24 @@ unsafe fn backup_lpf<BD: BitDepth>(
let n_lines = 4 - (row + stripe_h + 1 == h) as c_int;
for i in 0..4 {
let dst_abs_px_stride = BD::pxstride(dst_stride.unsigned_abs());
let (src_tmp, dst_tmp) = if i != n_lines {
(&src[src_offset..], &mut dst[dst_offset..])
} else if dst_stride < 0 {
let (dst_tmp, src_tmp) = dst[dst_offset..].split_at_mut(dst_abs_px_stride);
(&*src_tmp, dst_tmp)
if i != n_lines {
BD::pixel_copy(
&mut dst.mut_slice_as(dst_offset..dst_offset + src_w as usize),
&src[src_offset..],
src_w as usize,
);
} else {
let (src_tmp, dst_tmp) =
dst[dst_offset - dst_abs_px_stride..].split_at_mut(dst_abs_px_stride);
(&*src_tmp, dst_tmp)
};
BD::pixel_copy(dst_tmp, src_tmp, src_w as usize);
let (src_idx, dst_idx) = if dst_stride < 0 {
(dst_offset + dst_abs_px_stride, dst_offset)
} else {
(dst_offset - dst_abs_px_stride, dst_offset)
};
BD::pixel_copy(
&mut dst.mut_slice_as(dst_idx..dst_idx + src_w as usize),
&dst.slice_as(src_idx..src_idx + src_w as usize),
src_w as usize,
)
}
dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize;
src_offset = (src_offset as isize + BD::pxstride(src_stride)) as usize;
}
Expand Down Expand Up @@ -171,8 +189,6 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
// TODO Also check block level restore type to reduce copying.
let restore_planes = f.lf.restore_planes;

let mut lr_line_buf_lock = f.lf.lr_line_buf.write().unwrap();
let lr_line_buf = BD::cast_pixel_slice_mut(&mut lr_line_buf_lock);
if seq_hdr.cdef != 0 || restore_planes & LR_RESTORE_Y as c_int != 0 {
let h = f.cur.p.h;
let w = f.bw << 2;
Expand All @@ -181,7 +197,7 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_Y as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
lr_line_buf,
&f.lf.lr_line_buf,
dst_offset[0],
lr_stride[0],
src[0],
Expand Down Expand Up @@ -209,10 +225,8 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
let cdef_line_start = (f.lf.cdef_lpf_line[0] as isize + cmp::min(y_span, 0)) as usize;
backup_lpf::<BD>(
c,
&mut f.lf.cdef_line_buf.mut_slice_as(
cdef_line_start..cdef_line_start + cdef_plane_y_sz.unsigned_abs(),
),
(cdef_off_y - cmp::min(y_span, 0)) as usize,
&f.lf.cdef_line_buf,
cdef_line_start + (cdef_off_y - cmp::min(y_span, 0)) as usize,
src_stride[0],
src[0],
(src_offset[0] as isize - offset as isize * src_y_stride as isize) as usize,
Expand Down Expand Up @@ -248,7 +262,7 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_U as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
lr_line_buf,
&f.lf.lr_line_buf,
dst_offset[1],
lr_stride[1],
src[1],
Expand Down Expand Up @@ -276,10 +290,8 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
(f.lf.cdef_lpf_line[1] as isize + cmp::min(uv_span, 0)) as usize;
backup_lpf::<BD>(
c,
&mut f.lf.cdef_line_buf.mut_slice_as(
cdef_line_start..cdef_line_start + cdef_plane_uv_sz.unsigned_abs(),
),
(cdef_off_uv - cmp::min(uv_span, 0)) as usize,
&f.lf.cdef_line_buf,
cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize,
src_stride[1],
src[1],
(src_offset[1] as isize - offset_uv as isize * src_uv_stride) as usize,
Expand All @@ -304,7 +316,7 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_V as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
lr_line_buf,
&f.lf.lr_line_buf,
dst_offset[2],
lr_stride[1],
src[2],
Expand Down Expand Up @@ -332,10 +344,8 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
(f.lf.cdef_lpf_line[2] as isize + cmp::min(uv_span, 0)) as usize;
backup_lpf::<BD>(
c,
&mut f.lf.cdef_line_buf.mut_slice_as(
cdef_line_start..cdef_line_start + cdef_plane_uv_sz.unsigned_abs(),
),
(cdef_off_uv - cmp::min(uv_span, 0)) as usize,
&f.lf.cdef_line_buf,
cdef_line_start + (cdef_off_uv - cmp::min(uv_span, 0)) as usize,
src_stride[1],
src[2],
(src_offset[1] as isize - offset_uv as isize * src_uv_stride) as usize,
Expand Down
6 changes: 3 additions & 3 deletions src/lr_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ unsafe fn lr_stripe<BD: BitDepth>(
let sby = y + (if y != 0 { 8 << ss_ver } else { 0 }) >> 6 - ss_ver + seq_hdr.sb128;
let have_tt = (c.tc.len() > 1) as c_int;
let lpf_stride = BD::pxstride(stride);
let lr_line_buf_lock = f.lf.lr_line_buf.read().unwrap();
let lr_line_buf = BD::cast_pixel_slice(&lr_line_buf_lock);
let mut lpf_offset = f.lf.lr_lpf_line[plane as usize] as isize;
lpf_offset += (have_tt * (sby * (4 << seq_hdr.sb128) - 4)) as isize * lpf_stride + x as isize;
// The first stripe of the frame is shorter by 8 luma pixel rows.
Expand Down Expand Up @@ -101,7 +99,9 @@ unsafe fn lr_stripe<BD: BitDepth>(
// `lr_line_buf`, so we must use `.wrapping_offset` here.
// `.wrapping_offset` is needed since `.offset` requires the pointer is in bounds,
// which `.wrapping_offset` does not, and delays that requirement to when the pointer is dereferenced
lr_line_buf.as_ptr().wrapping_offset(lpf_offset).cast(),
(f.lf.lr_line_buf.as_mut_ptr() as *const BD::Pixel)
.wrapping_offset(lpf_offset)
.cast(),
unit_w,
stripe_h,
&mut params,
Expand Down

0 comments on commit 4245a60

Please sign in to comment.