From 253eaee62dfc48e81eb75560ee6ddc97d87d369c Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 3 Feb 2024 15:39:34 +0100 Subject: [PATCH 01/13] make Rav1dTaskContext_task_thread a *mut pointer --- src/decode.rs | 2 +- src/internal.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index ee1b9c076..ec1000cbf 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1452,7 +1452,7 @@ unsafe fn decode_b( } let ts = &mut *t.ts; - let f = &*t.f; + let f = &mut *t.f; let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); let mut b_mem = Default::default(); let b = if t.frame_thread.pass != 0 { diff --git a/src/internal.rs b/src/internal.rs index 35d064a14..833a9609a 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -665,7 +665,7 @@ pub(crate) struct Rav1dTaskContext_task_thread { #[repr(C)] pub(crate) struct Rav1dTaskContext { - pub f: *const Rav1dFrameContext, + pub f: *mut Rav1dFrameContext, pub ts: *mut Rav1dTileState, pub bx: c_int, pub by: c_int, From c9d1d3b8373db8de013652dc59c5464d17d2bb07 Mon Sep 17 00:00:00 2001 From: Folkert Date: Fri, 2 Feb 2024 16:39:23 +0100 Subject: [PATCH 02/13] make Rav1dFrameContext_lf::level a boxed slice --- src/decode.rs | 11 +++++------ src/internal.rs | 2 +- src/lf_apply.rs | 18 ++++++++++++------ src/lf_mask.rs | 22 ++-------------------- src/lib.rs | 2 +- 5 files changed, 21 insertions(+), 34 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index ec1000cbf..cd18c25a0 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -2161,7 +2161,7 @@ unsafe fn decode_b( if frame_hdr.loopfilter.level_y != [0, 0] { rav1d_create_lf_mask_intra( &mut *t.lf_mask, - f.lf.level, + &mut f.lf.level, f.b4_stride, &*ts.lflvl.offset(b.seg_id as isize), t.bx, @@ -3151,7 +3151,7 @@ unsafe fn decode_b( } rav1d_create_lf_mask_inter( &mut *t.lf_mask, - f.lf.level, + &mut f.lf.level, f.b4_stride, // In C, the inner dimensions (`ref`, `is_gmv`) are offset, // but then cast back to a pointer to the full array, @@ -4515,14 +4515,13 @@ pub(crate) unsafe fn rav1d_decode_frame_init( // update allocation for loopfilter masks if num_sb128 != f.lf.mask_sz { freep(&mut f.lf.mask as *mut *mut Av1Filter as *mut c_void); - freep(&mut f.lf.level as *mut *mut [u8; 4] as *mut c_void); + let _ = std::mem::take(&mut f.lf.level); f.lf.mask = malloc(::core::mem::size_of::() * num_sb128 as usize) as *mut Av1Filter; // over-allocate by 3 bytes since some of the SIMD implementations // index this from the level type and can thus over-read by up to 3 - f.lf.level = malloc(::core::mem::size_of::<[u8; 4]>() * num_sb128 as usize * 32 * 32 + 3) - as *mut [u8; 4]; - if f.lf.mask.is_null() || f.lf.level.is_null() { + f.lf.level = vec![[0u8; 4]; num_sb128 as usize * 32 * 32 + 3].into(); + if f.lf.mask.is_null() { f.lf.mask_sz = 0; return Err(ENOMEM); } diff --git a/src/internal.rs b/src/internal.rs index 833a9609a..e6e826989 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -382,7 +382,7 @@ pub struct Rav1dFrameContext_frame_thread { /// loopfilter #[repr(C)] pub struct Rav1dFrameContext_lf { - pub level: *mut [u8; 4], + pub level: Box<[[u8; 4]]>, pub mask: *mut Av1Filter, pub lr_mask: *mut Av1Restoration, pub mask_sz: c_int, /* w*h */ diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 43ab33d83..24e469458 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -697,8 +697,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } } let mut ptr: *mut BD::Pixel; - let mut level_ptr: *mut [u8; 4] = - ((*f).lf.level).offset((*f).b4_stride * sby as isize * sbsz as isize); + let mut level_ptr: *const [u8; 4] = ((*f).lf.level) + .as_ptr() + .offset((*f).b4_stride * sby as isize * sbsz as isize); ptr = *p.offset(0); have_left = 0 as c_int; x = 0 as c_int; @@ -724,7 +725,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( return; } let mut uv_off: ptrdiff_t; - level_ptr = ((*f).lf.level).offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize); + level_ptr = ((*f).lf.level) + .as_ptr() + .offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize); uv_off = 0 as c_int as ptrdiff_t; have_left = 0 as c_int; x = 0 as c_int; @@ -769,8 +772,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let endy4: c_uint = (starty4 + cmp::min((*f).h4 - sby * sbsz, sbsz)) as c_uint; let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; let mut ptr: *mut BD::Pixel; - let mut level_ptr: *mut [u8; 4] = - ((*f).lf.level).offset((*f).b4_stride * sby as isize * sbsz as isize); + let mut level_ptr: *const [u8; 4] = ((*f).lf.level) + .as_ptr() + .offset((*f).b4_stride * sby as isize * sbsz as isize); ptr = *p.offset(0); x = 0 as c_int; while x < (*f).sb128w { @@ -795,7 +799,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( return; } let mut uv_off: ptrdiff_t; - level_ptr = ((*f).lf.level).offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize); + level_ptr = ((*f).lf.level) + .as_ptr() + .offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize); uv_off = 0 as c_int as ptrdiff_t; x = 0 as c_int; while x < (*f).sb128w { diff --git a/src/lf_mask.rs b/src/lf_mask.rs index 99c099dd6..77d9af515 100644 --- a/src/lf_mask.rs +++ b/src/lf_mask.rs @@ -355,7 +355,7 @@ fn mask_edges_chroma( pub(crate) unsafe fn rav1d_create_lf_mask_intra( lflvl: &mut Av1Filter, - level_cache: *mut [u8; 4], + level_cache: &mut [[u8; 4]], b4_stride: ptrdiff_t, filter_level: &[[[u8; 2]; 8]; 4], bx: c_int, @@ -381,10 +381,6 @@ pub(crate) unsafe fn rav1d_create_lf_mask_intra( let by4 = by & 31; if bw4 != 0 && bh4 != 0 { - // TODO: Remove when `level_cache` is already a slice coming from a `Vec`. - // That refactor is complex, though, so for now we make it a slice by how elements are accessed. - let level_cache_len = (by * b4_stride + bx) + ((bh4 - 1) * b4_stride + bw4); - let level_cache = std::slice::from_raw_parts_mut(level_cache, level_cache_len); let offset = by * b4_stride + bx; for y in 0..bh4 { let offset = offset + y * b4_stride; @@ -420,11 +416,6 @@ pub(crate) unsafe fn rav1d_create_lf_mask_intra( let cbx4 = bx4 >> ss_hor; let cby4 = by4 >> ss_ver; - // TODO: Remove when `level_cache` is already a slice coming from a `Vec`. - // That refactor is complex, though, so for now we make it a slice by how elements are accessed. - let level_cache_len = - ((by >> ss_ver) * b4_stride + (bx >> ss_hor)) + ((cbh4 - 1) * b4_stride + cbw4); - let level_cache = std::slice::from_raw_parts_mut(level_cache, level_cache_len); let offset = (by >> ss_ver) * b4_stride + (bx >> ss_hor); for y in 0..cbh4 { let offset = offset + y * b4_stride; @@ -451,7 +442,7 @@ pub(crate) unsafe fn rav1d_create_lf_mask_intra( pub(crate) unsafe fn rav1d_create_lf_mask_inter( lflvl: &mut Av1Filter, - level_cache: *mut [u8; 4], + level_cache: &mut [[u8; 4]], b4_stride: ptrdiff_t, filter_level: &[[[u8; 2]; 8]; 4], r#ref: usize, @@ -482,10 +473,6 @@ pub(crate) unsafe fn rav1d_create_lf_mask_inter( let by4 = by & 31; if bw4 != 0 && bh4 != 0 { - // TODO: Remove when `level_cache` is already a slice coming from a `Vec`. - // That refactor is complex, though, so for now we make it a slice by how elements are accessed. - let level_cache_len = (by * b4_stride + bx) + ((bh4 - 1) * b4_stride + bw4); - let level_cache = std::slice::from_raw_parts_mut(level_cache, level_cache_len); let offset = by * b4_stride + bx; for y in 0..bh4 { let offset = offset + y * b4_stride; @@ -532,11 +519,6 @@ pub(crate) unsafe fn rav1d_create_lf_mask_inter( let cbx4 = bx4 >> ss_hor; let cby4 = by4 >> ss_ver; - // TODO: Remove when `level_cache` is already a slice coming from a `Vec`. - // That refactor is complex, though, so for now we make it a slice by how elements are accessed. - let level_cache_len = - ((by >> ss_ver) * b4_stride + (bx >> ss_hor)) + ((cbh4 - 1) * b4_stride + cbw4); - let level_cache = std::slice::from_raw_parts_mut(level_cache, level_cache_len); let offset = (by >> ss_ver) * b4_stride + (bx >> ss_hor); for y in 0..cbh4 { let offset = offset + y * b4_stride; diff --git a/src/lib.rs b/src/lib.rs index f475ea69a..b5bb00f8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -987,7 +987,7 @@ impl Drop for Rav1dContext { let _ = mem::take(&mut (*f).tiles); free((*f).lf.mask as *mut c_void); free((*f).lf.lr_mask as *mut c_void); - free((*f).lf.level as *mut c_void); + let _ = mem::take(&mut (*f).lf.level); free((*f).lf.tx_lpf_right_edge[0] as *mut c_void); free((*f).lf.start_of_tile_row as *mut c_void); rav1d_refmvs_clear(&mut (*f).rf); From d6979453a27fab99bb075a0a70ec097756244156 Mon Sep 17 00:00:00 2001 From: Folkert Date: Fri, 2 Feb 2024 17:55:15 +0100 Subject: [PATCH 03/13] simplify loops --- src/lf_apply.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 24e469458..393138d82 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -703,11 +703,15 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( ptr = *p.offset(0); have_left = 0 as c_int; x = 0 as c_int; - while x < (*f).sb128w { + let mut level_ptr: &[[u8; 4]] = + &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; + ptr = *p.offset(0); + have_left = 0 as c_int; + for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32)) { filter_plane_cols_y::( f, have_left, - level_ptr as *const [u8; 4], + level_ptr.as_ptr(), (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_y[0]).as_mut_ptr() as *const [[u16; 2]; 3], ptr, @@ -716,26 +720,21 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( starty4, endy4 as c_int, ); - x += 1; have_left = 1 as c_int; ptr = ptr.offset(128); - level_ptr = level_ptr.offset(32); } if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 { return; } let mut uv_off: ptrdiff_t; - level_ptr = ((*f).lf.level) - .as_ptr() - .offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize); + level_ptr = &((*f).lf.level)[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; uv_off = 0 as c_int as ptrdiff_t; have_left = 0 as c_int; - x = 0 as c_int; - while x < (*f).sb128w { + for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks((32 >> ss_hor) as usize)) { filter_plane_cols_uv::( f, have_left, - level_ptr as *const [u8; 4], + level_ptr.as_ptr(), (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_uv[0]).as_mut_ptr() as *const [[u16; 2]; 2], &mut *(*p.offset(1)).offset(uv_off as isize), @@ -746,10 +745,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( uv_endy4 as c_int, ss_ver, ); - x += 1; have_left = 1 as c_int; uv_off += 128 >> ss_hor; - level_ptr = level_ptr.offset((32 >> ss_hor) as isize); } } From 92bcd6548ffa4a79d3b23c2e5109c315466e1212 Mon Sep 17 00:00:00 2001 From: Folkert Date: Fri, 2 Feb 2024 17:57:03 +0100 Subject: [PATCH 04/13] refactor filter_plane_cols_uv --- src/lf_apply.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 393138d82..c60a883ea 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -436,7 +436,7 @@ unsafe fn filter_plane_rows_y( unsafe fn filter_plane_cols_uv( f: *const Rav1dFrameContext, have_left: c_int, - lvl: *const [u8; 4], + lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: *const [[u16; 2]; 2], u: *mut BD::Pixel, @@ -468,7 +468,7 @@ unsafe fn filter_plane_cols_uv( u.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*(*lvl.offset(x as isize)).as_ptr().offset(2) as *const u8 as *const [u8; 4], + &*lvl[x as usize].as_ptr().offset(2) as *const u8 as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -478,7 +478,7 @@ unsafe fn filter_plane_cols_uv( v.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*(*lvl.offset(x as isize)).as_ptr().offset(3) as *const u8 as *const [u8; 4], + &*lvl[x as usize].as_ptr().offset(3) as *const u8 as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -734,7 +734,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( filter_plane_cols_uv::( f, have_left, - level_ptr.as_ptr(), + level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_uv[0]).as_mut_ptr() as *const [[u16; 2]; 2], &mut *(*p.offset(1)).offset(uv_off as isize), From f8c9cf6dc12d1e3626e6e4d276d50d29762cb7dd Mon Sep 17 00:00:00 2001 From: Folkert Date: Fri, 2 Feb 2024 18:00:15 +0100 Subject: [PATCH 05/13] refactor filter_plane_cols_y --- src/lf_apply.rs | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index c60a883ea..044145c50 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -345,7 +345,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( unsafe fn filter_plane_cols_y( f: *const Rav1dFrameContext, have_left: c_int, - lvl: *const [u8; 4], + lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: *const [[u16; 2]; 3], dst: *mut BD::Pixel, @@ -355,8 +355,7 @@ unsafe fn filter_plane_cols_y( endy4: c_int, ) { let dsp: *const Rav1dDSPContext = (*f).dsp; - let mut x = 0; - while x < w { + for x in 0..w { if !(have_left == 0 && x == 0) { let mut hmask: [u32; 4] = [0; 4]; if starty4 == 0 { @@ -378,14 +377,14 @@ unsafe fn filter_plane_cols_y( dst.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*(*lvl.offset(x as isize)).as_ptr().offset(0) as *const u8 as *const [u8; 4], + &*(*lvl.as_ptr().offset(x as isize)).as_ptr().offset(0) as *const u8 + as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, (*f).bitdepth_max, ); } - x += 1; } } @@ -448,8 +447,7 @@ unsafe fn filter_plane_cols_uv( ss_ver: c_int, ) { let dsp: *const Rav1dDSPContext = (*f).dsp; - let mut x = 0; - while x < w { + for x in 0..w { if !(have_left == 0 && x == 0) { let mut hmask: [u32; 3] = [0; 3]; if starty4 == 0 { @@ -485,7 +483,6 @@ unsafe fn filter_plane_cols_uv( (*f).bitdepth_max, ); } - x += 1; } } @@ -697,12 +694,6 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } } let mut ptr: *mut BD::Pixel; - let mut level_ptr: *const [u8; 4] = ((*f).lf.level) - .as_ptr() - .offset((*f).b4_stride * sby as isize * sbsz as isize); - ptr = *p.offset(0); - have_left = 0 as c_int; - x = 0 as c_int; let mut level_ptr: &[[u8; 4]] = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = *p.offset(0); @@ -711,7 +702,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( filter_plane_cols_y::( f, have_left, - level_ptr.as_ptr(), + level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_y[0]).as_mut_ptr() as *const [[u16; 2]; 3], ptr, From 37b52decef3d56812041431310602294c62732f2 Mon Sep 17 00:00:00 2001 From: Folkert Date: Fri, 2 Feb 2024 18:04:17 +0100 Subject: [PATCH 06/13] simplify loops in rav1d_loopfilter_sbrow_rows --- src/lf_apply.rs | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 044145c50..b8def3550 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -747,7 +747,6 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( lflvl: *mut Av1Filter, sby: c_int, ) { - let mut x; let have_top = (sby > 0) as c_int; let seq_hdr = &***(*f).seq_hdr.as_ref().unwrap(); let is_sb64 = (seq_hdr.sb128 == 0) as c_int; @@ -760,16 +759,14 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let endy4: c_uint = (starty4 + cmp::min((*f).h4 - sby * sbsz, sbsz)) as c_uint; let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; let mut ptr: *mut BD::Pixel; - let mut level_ptr: *const [u8; 4] = ((*f).lf.level) - .as_ptr() - .offset((*f).b4_stride * sby as isize * sbsz as isize); + let mut level_ptr: &[[u8; 4]] = + &((*f).lf.level)[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = *p.offset(0); - x = 0 as c_int; - while x < (*f).sb128w { + for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32)) { filter_plane_rows_y::( f, have_top, - level_ptr as *const [u8; 4], + level_ptr.as_ptr(), (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_y[1]).as_mut_ptr() as *const [[u16; 2]; 3], ptr, @@ -778,25 +775,20 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( starty4, endy4 as c_int, ); - x += 1; ptr = ptr.offset(128); - level_ptr = level_ptr.offset(32); } let frame_hdr = &***(*f).frame_hdr.as_ref().unwrap(); if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 { return; } let mut uv_off: ptrdiff_t; - level_ptr = ((*f).lf.level) - .as_ptr() - .offset((*f).b4_stride * (sby * sbsz >> ss_ver) as isize); + level_ptr = &((*f).lf.level)[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; uv_off = 0 as c_int as ptrdiff_t; - x = 0 as c_int; - while x < (*f).sb128w { + for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks((32 >> ss_hor) as usize)) { filter_plane_rows_uv::( f, have_top, - level_ptr as *const [u8; 4], + level_ptr.as_ptr(), (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_uv[1]).as_mut_ptr() as *const [[u16; 2]; 2], &mut *(*p.offset(1)).offset(uv_off as isize), @@ -807,8 +799,6 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( uv_endy4 as c_int, ss_hor, ); - x += 1; uv_off += 128 >> ss_hor; - level_ptr = level_ptr.offset((32 >> ss_hor) as isize); } } From c418f85ab9756141ea36695f2d1f599528f847e7 Mon Sep 17 00:00:00 2001 From: Folkert Date: Fri, 2 Feb 2024 18:24:17 +0100 Subject: [PATCH 07/13] cleanup --- src/lf_apply.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index b8def3550..37ea4a5ab 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -392,7 +392,7 @@ unsafe fn filter_plane_cols_y( unsafe fn filter_plane_rows_y( f: *const Rav1dFrameContext, have_top: c_int, - mut lvl: *const [u8; 4], + lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: *const [[u16; 2]; 3], mut dst: *mut BD::Pixel, @@ -401,6 +401,7 @@ unsafe fn filter_plane_rows_y( starty4: c_int, endy4: c_int, ) { + let mut lvl = lvl.as_ptr(); let dsp: *const Rav1dDSPContext = (*f).dsp; let mut y = starty4; while y < endy4 { @@ -490,7 +491,7 @@ unsafe fn filter_plane_cols_uv( unsafe fn filter_plane_rows_uv( f: *const Rav1dFrameContext, have_top: c_int, - mut lvl: *const [u8; 4], + lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: *const [[u16; 2]; 2], u: *mut BD::Pixel, @@ -501,10 +502,10 @@ unsafe fn filter_plane_rows_uv( endy4: c_int, ss_hor: c_int, ) { + let mut lvl = lvl.as_ptr(); let dsp: *const Rav1dDSPContext = (*f).dsp; let mut off_l: ptrdiff_t = 0 as c_int as ptrdiff_t; - let mut y = starty4; - while y < endy4 { + for y in starty4..endy4 { if !(have_top == 0 && y == 0) { let vmask: [u32; 3] = [ (*mask.offset(y as isize))[0][0] as c_uint @@ -534,7 +535,6 @@ unsafe fn filter_plane_rows_uv( (*f).bitdepth_max, ); } - y += 1; off_l += 4 * BD::pxstride(ls as usize) as isize; lvl = lvl.offset(b4_stride as isize); } @@ -766,7 +766,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( filter_plane_rows_y::( f, have_top, - level_ptr.as_ptr(), + level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_y[1]).as_mut_ptr() as *const [[u16; 2]; 3], ptr, @@ -788,7 +788,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( filter_plane_rows_uv::( f, have_top, - level_ptr.as_ptr(), + level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_uv[1]).as_mut_ptr() as *const [[u16; 2]; 2], &mut *(*p.offset(1)).offset(uv_off as isize), From 18451f99839f410e2eca66dbd0daac14eb853289 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 3 Feb 2024 00:37:24 +0100 Subject: [PATCH 08/13] use a for loop in the plane_rows functions --- src/lf_apply.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 37ea4a5ab..4741f5f77 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -401,10 +401,8 @@ unsafe fn filter_plane_rows_y( starty4: c_int, endy4: c_int, ) { - let mut lvl = lvl.as_ptr(); let dsp: *const Rav1dDSPContext = (*f).dsp; - let mut y = starty4; - while y < endy4 { + for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(have_top == 0 && y == 0) { let vmask: [u32; 4] = [ (*mask.offset(y as isize))[0][0] as c_uint @@ -419,16 +417,14 @@ unsafe fn filter_plane_rows_y( dst.cast(), ls, vmask.as_ptr(), - &*(*lvl.offset(0)).as_ptr().offset(1) as *const u8 as *const [u8; 4], + &*(*lvl.as_ptr().offset(0)).as_ptr().offset(1) as *const u8 as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, (*f).bitdepth_max, ); } - y += 1; dst = dst.offset(4 * BD::pxstride(ls as usize) as isize); - lvl = lvl.offset(b4_stride as isize); } } @@ -502,10 +498,9 @@ unsafe fn filter_plane_rows_uv( endy4: c_int, ss_hor: c_int, ) { - let mut lvl = lvl.as_ptr(); let dsp: *const Rav1dDSPContext = (*f).dsp; let mut off_l: ptrdiff_t = 0 as c_int as ptrdiff_t; - for y in starty4..endy4 { + for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(have_top == 0 && y == 0) { let vmask: [u32; 3] = [ (*mask.offset(y as isize))[0][0] as c_uint @@ -518,7 +513,7 @@ unsafe fn filter_plane_rows_uv( u.offset(off_l as isize).cast(), ls, vmask.as_ptr(), - &*(*lvl.offset(0)).as_ptr().offset(2) as *const u8 as *const [u8; 4], + &*(*lvl.as_ptr().offset(0)).as_ptr().offset(2) as *const u8 as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, @@ -528,7 +523,7 @@ unsafe fn filter_plane_rows_uv( v.offset(off_l as isize).cast(), ls, vmask.as_ptr(), - &*(*lvl.offset(0)).as_ptr().offset(3) as *const u8 as *const [u8; 4], + &*(*lvl.as_ptr().offset(0)).as_ptr().offset(3) as *const u8 as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, @@ -536,7 +531,6 @@ unsafe fn filter_plane_rows_uv( ); } off_l += 4 * BD::pxstride(ls as usize) as isize; - lvl = lvl.offset(b4_stride as isize); } } @@ -741,6 +735,16 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } } +fn prefixes(slice: &[T], n: usize) -> impl Iterator { + let mut offset = 0; + + std::iter::from_fn(move || { + let new = slice.get(offset..)?; + offset = offset.saturating_add(n); + Some(new) + }) +} + pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( f: *const Rav1dFrameContext, p: *const *mut BD::Pixel, @@ -762,7 +766,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let mut level_ptr: &[[u8; 4]] = &((*f).lf.level)[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = *p.offset(0); - for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32)) { + for (x, level_ptr) in (0..(*f).sb128w).zip(prefixes(level_ptr, 32)) { filter_plane_rows_y::( f, have_top, @@ -784,7 +788,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let mut uv_off: ptrdiff_t; level_ptr = &((*f).lf.level)[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; uv_off = 0 as c_int as ptrdiff_t; - for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks((32 >> ss_hor) as usize)) { + for (x, level_ptr) in (0..(*f).sb128w).zip(prefixes(level_ptr, (32 >> ss_hor) as usize)) { filter_plane_rows_uv::( f, have_top, From 51cdd2e229b086677cf280e8ec6d5e6290ce2975 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 3 Feb 2024 00:55:25 +0100 Subject: [PATCH 09/13] simplify indexing --- src/lf_apply.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 4741f5f77..8dd60b068 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -377,8 +377,7 @@ unsafe fn filter_plane_cols_y( dst.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*(*lvl.as_ptr().offset(x as isize)).as_ptr().offset(0) as *const u8 - as *const [u8; 4], + lvl[x as usize][0..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -417,7 +416,7 @@ unsafe fn filter_plane_rows_y( dst.cast(), ls, vmask.as_ptr(), - &*(*lvl.as_ptr().offset(0)).as_ptr().offset(1) as *const u8 as *const [u8; 4], + lvl[0][1..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, @@ -463,7 +462,7 @@ unsafe fn filter_plane_cols_uv( u.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*lvl[x as usize].as_ptr().offset(2) as *const u8 as *const [u8; 4], + lvl[x as usize][2..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -473,7 +472,7 @@ unsafe fn filter_plane_cols_uv( v.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - &*lvl[x as usize].as_ptr().offset(3) as *const u8 as *const [u8; 4], + lvl[x as usize][3..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -513,7 +512,7 @@ unsafe fn filter_plane_rows_uv( u.offset(off_l as isize).cast(), ls, vmask.as_ptr(), - &*(*lvl.as_ptr().offset(0)).as_ptr().offset(2) as *const u8 as *const [u8; 4], + lvl[0][2..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, @@ -523,7 +522,7 @@ unsafe fn filter_plane_rows_uv( v.offset(off_l as isize).cast(), ls, vmask.as_ptr(), - &*(*lvl.as_ptr().offset(0)).as_ptr().offset(3) as *const u8 as *const [u8; 4], + lvl[0][3..].as_ptr() as *const [u8; 4], b4_stride, &(*f).lf.lim_lut.0, w, @@ -735,7 +734,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } } -fn prefixes(slice: &[T], n: usize) -> impl Iterator { +/// Gives the same starting positions as `chunks`, +/// but gives the whole remaining slice at that position +fn suffixes(slice: &[T], n: usize) -> impl Iterator { let mut offset = 0; std::iter::from_fn(move || { @@ -756,17 +757,15 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let is_sb64 = (seq_hdr.sb128 == 0) as c_int; let starty4 = (sby & is_sb64) << 4; let sbsz = 32 >> is_sb64; - let ss_ver = - ((*f).cur.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; - let ss_hor = - ((*f).cur.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; + let ss_ver = ((*f).cur.p.layout == Rav1dPixelLayout::I420) as c_int; + let ss_hor = ((*f).cur.p.layout != Rav1dPixelLayout::I444) as c_int; let endy4: c_uint = (starty4 + cmp::min((*f).h4 - sby * sbsz, sbsz)) as c_uint; let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; let mut ptr: *mut BD::Pixel; let mut level_ptr: &[[u8; 4]] = &((*f).lf.level)[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = *p.offset(0); - for (x, level_ptr) in (0..(*f).sb128w).zip(prefixes(level_ptr, 32)) { + for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, 32)) { filter_plane_rows_y::( f, have_top, @@ -788,7 +787,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let mut uv_off: ptrdiff_t; level_ptr = &((*f).lf.level)[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; uv_off = 0 as c_int as ptrdiff_t; - for (x, level_ptr) in (0..(*f).sb128w).zip(prefixes(level_ptr, (32 >> ss_hor) as usize)) { + for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, (32 >> ss_hor) as usize)) { filter_plane_rows_uv::( f, have_top, From 91fd0e91582f83ce449f18db5da111fa6a420abe Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 3 Feb 2024 00:58:39 +0100 Subject: [PATCH 10/13] cleanup p parameter --- src/lf_apply.rs | 16 ++++++++-------- src/recon.rs | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 8dd60b068..02617c20c 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -535,7 +535,7 @@ unsafe fn filter_plane_rows_uv( pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( f: *const Rav1dFrameContext, - p: *const *mut BD::Pixel, + p: &[*mut BD::Pixel; 3], lflvl: *mut Av1Filter, sby: c_int, start_of_tile_row: c_int, @@ -689,7 +689,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let mut ptr: *mut BD::Pixel; let mut level_ptr: &[[u8; 4]] = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; - ptr = *p.offset(0); + ptr = p[0]; have_left = 0 as c_int; for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32)) { filter_plane_cols_y::( @@ -721,8 +721,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_uv[0]).as_mut_ptr() as *const [[u16; 2]; 2], - &mut *(*p.offset(1)).offset(uv_off as isize), - &mut *(*p.offset(2)).offset(uv_off as isize), + &mut *p[1].offset(uv_off as isize), + &mut *p[2].offset(uv_off as isize), (*f).cur.stride[1], cmp::min(32 as c_int, (*f).w4 - x * 32) + ss_hor >> ss_hor, starty4 >> ss_ver, @@ -748,7 +748,7 @@ fn suffixes(slice: &[T], n: usize) -> impl Iterator { pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( f: *const Rav1dFrameContext, - p: *const *mut BD::Pixel, + p: &[*mut BD::Pixel; 3], lflvl: *mut Av1Filter, sby: c_int, ) { @@ -764,7 +764,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let mut ptr: *mut BD::Pixel; let mut level_ptr: &[[u8; 4]] = &((*f).lf.level)[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; - ptr = *p.offset(0); + ptr = p[0]; for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, 32)) { filter_plane_rows_y::( f, @@ -794,8 +794,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( level_ptr, (*f).b4_stride, ((*lflvl.offset(x as isize)).filter_uv[1]).as_mut_ptr() as *const [[u16; 2]; 2], - &mut *(*p.offset(1)).offset(uv_off as isize), - &mut *(*p.offset(2)).offset(uv_off as isize), + &mut *p[1].offset(uv_off as isize), + &mut *p[2].offset(uv_off as isize), (*f).cur.stride[1], cmp::min(32 as c_int, (*f).w4 - x * 32) + ss_hor >> ss_hor, starty4 >> ss_ver, diff --git a/src/recon.rs b/src/recon.rs index b2cac4fc3..f45166102 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -4484,7 +4484,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_cols( (f.lf.mask).offset(((sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w) as isize); rav1d_loopfilter_sbrow_cols::( f, - p.as_ptr(), + &p, mask, sby, *(f.lf.start_of_tile_row).offset(sby as isize) as c_int, @@ -4516,7 +4516,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( if c.inloop_filters as c_uint & RAV1D_INLOOPFILTER_DEBLOCK as c_int as c_uint != 0 && (frame_hdr.loopfilter.level_y[0] != 0 || frame_hdr.loopfilter.level_y[1] != 0) { - rav1d_loopfilter_sbrow_rows::(f, p.as_ptr(), mask, sby); + rav1d_loopfilter_sbrow_rows::(f, &p, mask, sby); } if seq_hdr.cdef != 0 || f.lf.restore_planes != 0 { rav1d_copy_lpf::(c, f, p.as_ptr(), sby); From d3a4518d2d367da89c99d4956dca79eaa53cfe65 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 3 Feb 2024 15:56:06 +0100 Subject: [PATCH 11/13] cleanup --- src/decode.rs | 2 +- src/lf_apply.rs | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index cd18c25a0..50bd89347 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -4515,7 +4515,7 @@ pub(crate) unsafe fn rav1d_decode_frame_init( // update allocation for loopfilter masks if num_sb128 != f.lf.mask_sz { freep(&mut f.lf.mask as *mut *mut Av1Filter as *mut c_void); - let _ = std::mem::take(&mut f.lf.level); + let _ = mem::take(&mut f.lf.level); f.lf.mask = malloc(::core::mem::size_of::() * num_sb128 as usize) as *mut Av1Filter; // over-allocate by 3 bytes since some of the SIMD implementations diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 02617c20c..483caceeb 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -711,7 +711,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( return; } let mut uv_off: ptrdiff_t; - level_ptr = &((*f).lf.level)[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; uv_off = 0 as c_int as ptrdiff_t; have_left = 0 as c_int; for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks((32 >> ss_hor) as usize)) { @@ -741,6 +741,9 @@ fn suffixes(slice: &[T], n: usize) -> impl Iterator { std::iter::from_fn(move || { let new = slice.get(offset..)?; + if new.is_empty() { + return None; + } offset = offset.saturating_add(n); Some(new) }) @@ -763,7 +766,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; let mut ptr: *mut BD::Pixel; let mut level_ptr: &[[u8; 4]] = - &((*f).lf.level)[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; + &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = p[0]; for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, 32)) { filter_plane_rows_y::( @@ -785,7 +788,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( return; } let mut uv_off: ptrdiff_t; - level_ptr = &((*f).lf.level)[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; uv_off = 0 as c_int as ptrdiff_t; for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, (32 >> ss_hor) as usize)) { filter_plane_rows_uv::( From 7ac5d59a550564b627e63f03bcf4f6fd90ef0c6c Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 3 Feb 2024 22:06:10 +0100 Subject: [PATCH 12/13] Apply suggestions from code review Co-authored-by: Khyber Sen --- src/decode.rs | 2 +- src/lf_apply.rs | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 50bd89347..aa47b691a 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -4520,7 +4520,7 @@ pub(crate) unsafe fn rav1d_decode_frame_init( malloc(::core::mem::size_of::() * num_sb128 as usize) as *mut Av1Filter; // over-allocate by 3 bytes since some of the SIMD implementations // index this from the level type and can thus over-read by up to 3 - f.lf.level = vec![[0u8; 4]; num_sb128 as usize * 32 * 32 + 3].into(); + f.lf.level = vec![[0u8; 4]; num_sb128 as usize * 32 * 32 + 3].into(); // TODO fallible allocation if f.lf.mask.is_null() { f.lf.mask_sz = 0; return Err(ENOMEM); diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 483caceeb..7082700c9 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -687,8 +687,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } } let mut ptr: *mut BD::Pixel; - let mut level_ptr: &[[u8; 4]] = - &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; + let level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = p[0]; have_left = 0 as c_int; for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32)) { @@ -711,10 +710,10 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( return; } let mut uv_off: ptrdiff_t; - level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + let level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; uv_off = 0 as c_int as ptrdiff_t; have_left = 0 as c_int; - for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks((32 >> ss_hor) as usize)) { + for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32 >> ss_hor)) { filter_plane_cols_uv::( f, have_left, @@ -765,8 +764,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let endy4: c_uint = (starty4 + cmp::min((*f).h4 - sby * sbsz, sbsz)) as c_uint; let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; let mut ptr: *mut BD::Pixel; - let mut level_ptr: &[[u8; 4]] = - &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; + let level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = p[0]; for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, 32)) { filter_plane_rows_y::( @@ -788,9 +786,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( return; } let mut uv_off: ptrdiff_t; - level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + let level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; uv_off = 0 as c_int as ptrdiff_t; - for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, (32 >> ss_hor) as usize)) { + for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, 32 >> ss_hor)) { filter_plane_rows_uv::( f, have_top, From 70807e9bdba2f22b6532485bf94a92cb75d163b4 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 3 Feb 2024 22:22:07 +0100 Subject: [PATCH 13/13] suffixes -> simple slice --- src/lf_apply.rs | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 7082700c9..a5624bea8 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -711,8 +711,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } let mut uv_off: ptrdiff_t; let level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; - uv_off = 0 as c_int as ptrdiff_t; have_left = 0 as c_int; + uv_off = 0; for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32 >> ss_hor)) { filter_plane_cols_uv::( f, @@ -733,27 +733,13 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } } -/// Gives the same starting positions as `chunks`, -/// but gives the whole remaining slice at that position -fn suffixes(slice: &[T], n: usize) -> impl Iterator { - let mut offset = 0; - - std::iter::from_fn(move || { - let new = slice.get(offset..)?; - if new.is_empty() { - return None; - } - offset = offset.saturating_add(n); - Some(new) - }) -} - pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( f: *const Rav1dFrameContext, p: &[*mut BD::Pixel; 3], lflvl: *mut Av1Filter, sby: c_int, ) { + // Don't filter outside the frame let have_top = (sby > 0) as c_int; let seq_hdr = &***(*f).seq_hdr.as_ref().unwrap(); let is_sb64 = (seq_hdr.sb128 == 0) as c_int; @@ -763,10 +749,11 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let ss_hor = ((*f).cur.p.layout != Rav1dPixelLayout::I444) as c_int; let endy4: c_uint = (starty4 + cmp::min((*f).h4 - sby * sbsz, sbsz)) as c_uint; let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; + let mut ptr: *mut BD::Pixel; - let level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; + let mut level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = p[0]; - for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, 32)) { + for x in 0..(*f).sb128w { filter_plane_rows_y::( f, have_top, @@ -775,20 +762,24 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( ((*lflvl.offset(x as isize)).filter_y[1]).as_mut_ptr() as *const [[u16; 2]; 3], ptr, (*f).cur.stride[0], - cmp::min(32 as c_int, (*f).w4 - x * 32), + cmp::min(32, (*f).w4 - x * 32), starty4, endy4 as c_int, ); ptr = ptr.offset(128); + level_ptr = &level_ptr[32..]; } + let frame_hdr = &***(*f).frame_hdr.as_ref().unwrap(); if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 { return; } + let mut uv_off: ptrdiff_t; - let level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; - uv_off = 0 as c_int as ptrdiff_t; - for (x, level_ptr) in (0..(*f).sb128w).zip(suffixes(level_ptr, 32 >> ss_hor)) { + let mut level_ptr = + &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + uv_off = 0; + for x in 0..(*f).sb128w { filter_plane_rows_uv::( f, have_top, @@ -804,5 +795,6 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( ss_hor, ); uv_off += 128 >> ss_hor; + level_ptr = &level_ptr[32 >> ss_hor..]; } }