From 3b3adc4761d6eb543a3157dce2d86995f445ba6c Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:01:18 -0700 Subject: [PATCH 01/12] `fn w_mask_rust`: Cleanup `as` casts. --- src/mc.rs | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index 8567b1da4..c1857d20f 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -851,13 +851,12 @@ pub unsafe fn w_mask_rust( let mut x = 0; while x < w { let m = std::cmp::min( - 38 as libc::c_int - + ((*tmp1.offset(x as isize) as libc::c_int - - *tmp2.offset(x as isize) as libc::c_int) - .abs() - + mask_rnd - >> mask_sh), - 64 as libc::c_int, + 38 + ((*tmp1.offset(x as isize) as libc::c_int + - *tmp2.offset(x as isize) as libc::c_int) + .abs() + + mask_rnd + >> mask_sh), + 64, ); *dst.offset(x as isize) = bd.iclip_pixel( *tmp1.offset(x as isize) as libc::c_int * m @@ -868,13 +867,12 @@ pub unsafe fn w_mask_rust( if ss_hor != 0 { x += 1; let n = std::cmp::min( - 38 as libc::c_int - + ((*tmp1.offset(x as isize) as libc::c_int - - *tmp2.offset(x as isize) as libc::c_int) - .abs() - + mask_rnd - >> mask_sh), - 64 as libc::c_int, + 38 + ((*tmp1.offset(x as isize) as libc::c_int + - *tmp2.offset(x as isize) as libc::c_int) + .abs() + + mask_rnd + >> mask_sh), + 64, ); *dst.offset(x as isize) = bd.iclip_pixel( *tmp1.offset(x as isize) as libc::c_int * n From 8a047b174a68ff940b2c8496c95b57a1205a685e Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:10:57 -0700 Subject: [PATCH 02/12] `fn w_mask_c`: Make `ss_{hor,ver}` args `bool`s. --- src/mc.rs | 14 +++++++------- src/mc_tmpl_16.rs | 4 ++-- src/mc_tmpl_8.rs | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index c1857d20f..80f250a43 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -837,8 +837,8 @@ pub unsafe fn w_mask_rust( h: libc::c_int, mut mask: *mut u8, sign: libc::c_int, - ss_hor: libc::c_int, - ss_ver: libc::c_int, + ss_hor: bool, + ss_ver: bool, bd: BD, ) { let intermediate_bits = bd.get_intermediate_bits(); @@ -864,7 +864,7 @@ pub unsafe fn w_mask_rust( + rnd >> sh, ); - if ss_hor != 0 { + if ss_hor { x += 1; let n = std::cmp::min( 38 + ((*tmp1.offset(x as isize) as libc::c_int @@ -880,11 +880,11 @@ pub unsafe fn w_mask_rust( + rnd >> sh, ); - if h & ss_ver != 0 { + if h & ss_ver as libc::c_int != 0 { *mask.offset((x >> 1) as isize) = (m + n + *mask.offset((x >> 1) as isize) as libc::c_int + 2 - sign >> 2) as u8; - } else if ss_ver != 0 { + } else if ss_ver { *mask.offset((x >> 1) as isize) = (m + n) as u8; } else { *mask.offset((x >> 1) as isize) = (m + n + 1 - sign >> 1) as u8; @@ -897,8 +897,8 @@ pub unsafe fn w_mask_rust( tmp1 = tmp1.offset(w as isize); tmp2 = tmp2.offset(w as isize); dst = dst.offset(BD::pxstride(dst_stride as usize) as isize); - if ss_ver == 0 || h & 1 != 0 { - mask = mask.offset((w >> ss_hor) as isize); + if !ss_ver || h & 1 != 0 { + mask = mask.offset((w >> ss_hor as libc::c_int) as isize); } } } diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index 42322258e..f802202f9 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -3188,8 +3188,8 @@ unsafe extern "C" fn w_mask_c( h, mask, sign, - ss_hor, - ss_ver, + ss_hor != 0, + ss_ver != 0, BitDepth16::new(bitdepth_max as u16), ) } diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index 34a5cec5e..40f80ad67 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -3089,8 +3089,8 @@ unsafe extern "C" fn w_mask_c( h, mask, sign, - ss_hor, - ss_ver, + ss_hor != 0, + ss_ver != 0, BitDepth8::new(()), ) } From 913de92c6148622730484929ffed0834a1bb6e31 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:13:00 -0700 Subject: [PATCH 03/12] `fn w_mask_c`: Make `w`, `h, `dst_stride` args `usize`s. --- src/mc.rs | 12 ++++++------ src/mc_tmpl_16.rs | 6 +++--- src/mc_tmpl_8.rs | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index 80f250a43..bc1f7ddeb 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -830,11 +830,11 @@ pub unsafe fn blend_h_rust( // TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated pub unsafe fn w_mask_rust( mut dst: *mut BD::Pixel, - dst_stride: libc::ptrdiff_t, + dst_stride: usize, mut tmp1: *const i16, mut tmp2: *const i16, - w: libc::c_int, - h: libc::c_int, + w: usize, + h: usize, mut mask: *mut u8, sign: libc::c_int, ss_hor: bool, @@ -880,7 +880,7 @@ pub unsafe fn w_mask_rust( + rnd >> sh, ); - if h & ss_ver as libc::c_int != 0 { + if h & ss_ver as usize != 0 { *mask.offset((x >> 1) as isize) = (m + n + *mask.offset((x >> 1) as isize) as libc::c_int + 2 - sign >> 2) as u8; @@ -896,9 +896,9 @@ pub unsafe fn w_mask_rust( } tmp1 = tmp1.offset(w as isize); tmp2 = tmp2.offset(w as isize); - dst = dst.offset(BD::pxstride(dst_stride as usize) as isize); + dst = dst.offset(BD::pxstride(dst_stride) as isize); if !ss_ver || h & 1 != 0 { - mask = mask.offset((w >> ss_hor as libc::c_int) as isize); + mask = mask.offset((w >> ss_hor as usize) as isize); } } } diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index f802202f9..0ae60a9c1 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -3181,11 +3181,11 @@ unsafe extern "C" fn w_mask_c( ) { w_mask_rust( dst, - dst_stride, + dst_stride as usize, tmp1, tmp2, - w, - h, + w as usize, + h as usize, mask, sign, ss_hor != 0, diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index 40f80ad67..2fd352635 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -3082,11 +3082,11 @@ unsafe extern "C" fn w_mask_c( ) { w_mask_rust( dst, - dst_stride, + dst_stride as usize, tmp1, tmp2, - w, - h, + w as usize, + h as usize, mask, sign, ss_hor != 0, From 206165066327630c7bdf025229600f5c054f2f4b Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:16:13 -0700 Subject: [PATCH 04/12] `fn w_mask_rust`: Make `tmp{1,2}` vars slices early. --- src/mc.rs | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index bc1f7ddeb..6335d1d18 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -831,8 +831,8 @@ pub unsafe fn blend_h_rust( pub unsafe fn w_mask_rust( mut dst: *mut BD::Pixel, dst_stride: usize, - mut tmp1: *const i16, - mut tmp2: *const i16, + tmp1: *const i16, + tmp2: *const i16, w: usize, h: usize, mut mask: *mut u8, @@ -841,6 +841,8 @@ pub unsafe fn w_mask_rust( ss_ver: bool, bd: BD, ) { + let [mut tmp1, mut tmp2] = [tmp1, tmp2].map(|tmp| std::slice::from_raw_parts(tmp, h * w)); + let intermediate_bits = bd.get_intermediate_bits(); let bitdepth = bd.bitdepth(); let sh = intermediate_bits + 6; @@ -851,34 +853,22 @@ pub unsafe fn w_mask_rust( let mut x = 0; while x < w { let m = std::cmp::min( - 38 + ((*tmp1.offset(x as isize) as libc::c_int - - *tmp2.offset(x as isize) as libc::c_int) - .abs() - + mask_rnd + 38 + ((tmp1[x] as libc::c_int - tmp2[x] as libc::c_int).abs() + mask_rnd >> mask_sh), 64, ); *dst.offset(x as isize) = bd.iclip_pixel( - *tmp1.offset(x as isize) as libc::c_int * m - + *tmp2.offset(x as isize) as libc::c_int * (64 - m) - + rnd - >> sh, + tmp1[x] as libc::c_int * m + tmp2[x] as libc::c_int * (64 - m) + rnd >> sh, ); if ss_hor { x += 1; let n = std::cmp::min( - 38 + ((*tmp1.offset(x as isize) as libc::c_int - - *tmp2.offset(x as isize) as libc::c_int) - .abs() - + mask_rnd + 38 + ((tmp1[x] as libc::c_int - tmp2[x] as libc::c_int).abs() + mask_rnd >> mask_sh), 64, ); *dst.offset(x as isize) = bd.iclip_pixel( - *tmp1.offset(x as isize) as libc::c_int * n - + *tmp2.offset(x as isize) as libc::c_int * (64 - n) - + rnd - >> sh, + tmp1[x] as libc::c_int * n + tmp2[x] as libc::c_int * (64 - n) + rnd >> sh, ); if h & ss_ver as usize != 0 { *mask.offset((x >> 1) as isize) = @@ -894,8 +884,8 @@ pub unsafe fn w_mask_rust( } x += 1; } - tmp1 = tmp1.offset(w as isize); - tmp2 = tmp2.offset(w as isize); + tmp1 = &tmp1[w..]; + tmp2 = &tmp2[w..]; dst = dst.offset(BD::pxstride(dst_stride) as isize); if !ss_ver || h & 1 != 0 { mask = mask.offset((w >> ss_hor as usize) as isize); From 8bf099b564a6611d1562b07ea73a15748fcdc0dd Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:20:53 -0700 Subject: [PATCH 05/12] `fn w_mask_rust`: Make `dst` var a slice early. --- src/mc.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index 6335d1d18..5255e9778 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -829,7 +829,7 @@ pub unsafe fn blend_h_rust( // TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated pub unsafe fn w_mask_rust( - mut dst: *mut BD::Pixel, + dst: *mut BD::Pixel, dst_stride: usize, tmp1: *const i16, tmp2: *const i16, @@ -841,6 +841,8 @@ pub unsafe fn w_mask_rust( ss_ver: bool, bd: BD, ) { + let dst_stride = BD::pxstride(dst_stride); + let mut dst = std::slice::from_raw_parts_mut(dst, h * dst_stride + w); let [mut tmp1, mut tmp2] = [tmp1, tmp2].map(|tmp| std::slice::from_raw_parts(tmp, h * w)); let intermediate_bits = bd.get_intermediate_bits(); @@ -857,7 +859,7 @@ pub unsafe fn w_mask_rust( >> mask_sh), 64, ); - *dst.offset(x as isize) = bd.iclip_pixel( + dst[x] = bd.iclip_pixel( tmp1[x] as libc::c_int * m + tmp2[x] as libc::c_int * (64 - m) + rnd >> sh, ); if ss_hor { @@ -867,7 +869,7 @@ pub unsafe fn w_mask_rust( >> mask_sh), 64, ); - *dst.offset(x as isize) = bd.iclip_pixel( + dst[x] = bd.iclip_pixel( tmp1[x] as libc::c_int * n + tmp2[x] as libc::c_int * (64 - n) + rnd >> sh, ); if h & ss_ver as usize != 0 { @@ -886,7 +888,7 @@ pub unsafe fn w_mask_rust( } tmp1 = &tmp1[w..]; tmp2 = &tmp2[w..]; - dst = dst.offset(BD::pxstride(dst_stride) as isize); + dst = &mut dst[dst_stride..]; if !ss_ver || h & 1 != 0 { mask = mask.offset((w >> ss_hor as usize) as isize); } From 8f0015bef79821d867124f948ad89f85acabe3ab Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:36:06 -0700 Subject: [PATCH 06/12] `fn w_mask_rust`: Make `mask` var a slice early. --- src/mc.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index 5255e9778..d63a58706 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -835,7 +835,7 @@ pub unsafe fn w_mask_rust( tmp2: *const i16, w: usize, h: usize, - mut mask: *mut u8, + mask: *mut u8, sign: libc::c_int, ss_hor: bool, ss_ver: bool, @@ -844,6 +844,8 @@ pub unsafe fn w_mask_rust( let dst_stride = BD::pxstride(dst_stride); let mut dst = std::slice::from_raw_parts_mut(dst, h * dst_stride + w); let [mut tmp1, mut tmp2] = [tmp1, tmp2].map(|tmp| std::slice::from_raw_parts(tmp, h * w)); + let mut mask = + std::slice::from_raw_parts_mut(mask, (w >> ss_hor as usize) * (h >> ss_ver as usize)); let intermediate_bits = bd.get_intermediate_bits(); let bitdepth = bd.bitdepth(); @@ -873,16 +875,14 @@ pub unsafe fn w_mask_rust( tmp1[x] as libc::c_int * n + tmp2[x] as libc::c_int * (64 - n) + rnd >> sh, ); if h & ss_ver as usize != 0 { - *mask.offset((x >> 1) as isize) = - (m + n + *mask.offset((x >> 1) as isize) as libc::c_int + 2 - sign >> 2) - as u8; + mask[x >> 1] = (m + n + mask[x >> 1] as libc::c_int + 2 - sign >> 2) as u8; } else if ss_ver { - *mask.offset((x >> 1) as isize) = (m + n) as u8; + mask[x >> 1] = (m + n) as u8; } else { - *mask.offset((x >> 1) as isize) = (m + n + 1 - sign >> 1) as u8; + mask[x >> 1] = (m + n + 1 - sign >> 1) as u8; } } else { - *mask.offset(x as isize) = m as u8; + mask[x] = m as u8; } x += 1; } @@ -890,7 +890,7 @@ pub unsafe fn w_mask_rust( tmp2 = &tmp2[w..]; dst = &mut dst[dst_stride..]; if !ss_ver || h & 1 != 0 { - mask = mask.offset((w >> ss_hor as usize) as isize); + mask = &mut mask[w >> ss_hor as usize..]; } } } From 90f3a7318056d37fbcf776172e3b9c0348a129f9 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:39:01 -0700 Subject: [PATCH 07/12] `fn w_mask_rust`: Add parentheses around bitshift operands for clarity. --- src/mc.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index d63a58706..1d091aa49 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -857,29 +857,29 @@ pub unsafe fn w_mask_rust( let mut x = 0; while x < w { let m = std::cmp::min( - 38 + ((tmp1[x] as libc::c_int - tmp2[x] as libc::c_int).abs() + mask_rnd + 38 + (((tmp1[x] as libc::c_int - tmp2[x] as libc::c_int).abs() + mask_rnd) >> mask_sh), 64, ); dst[x] = bd.iclip_pixel( - tmp1[x] as libc::c_int * m + tmp2[x] as libc::c_int * (64 - m) + rnd >> sh, + (tmp1[x] as libc::c_int * m + tmp2[x] as libc::c_int * (64 - m) + rnd) >> sh, ); if ss_hor { x += 1; let n = std::cmp::min( - 38 + ((tmp1[x] as libc::c_int - tmp2[x] as libc::c_int).abs() + mask_rnd + 38 + (((tmp1[x] as libc::c_int - tmp2[x] as libc::c_int).abs() + mask_rnd) >> mask_sh), 64, ); dst[x] = bd.iclip_pixel( - tmp1[x] as libc::c_int * n + tmp2[x] as libc::c_int * (64 - n) + rnd >> sh, + (tmp1[x] as libc::c_int * n + tmp2[x] as libc::c_int * (64 - n) + rnd) >> sh, ); if h & ss_ver as usize != 0 { - mask[x >> 1] = (m + n + mask[x >> 1] as libc::c_int + 2 - sign >> 2) as u8; + mask[x >> 1] = ((m + n + mask[x >> 1] as libc::c_int + 2 - sign) >> 2) as u8; } else if ss_ver { mask[x >> 1] = (m + n) as u8; } else { - mask[x >> 1] = (m + n + 1 - sign >> 1) as u8; + mask[x >> 1] = ((m + n + 1 - sign) >> 1) as u8; } } else { mask[x] = m as u8; From 008616e09e34b3926aa2fba58e00c510f4481301 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:43:39 -0700 Subject: [PATCH 08/12] `fn w_mask_rust`: Make `sign` arg a `bool`. --- src/mc.rs | 6 +++--- src/mc_tmpl_16.rs | 3 ++- src/mc_tmpl_8.rs | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index 1d091aa49..b7038898b 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -836,7 +836,7 @@ pub unsafe fn w_mask_rust( w: usize, h: usize, mask: *mut u8, - sign: libc::c_int, + sign: bool, ss_hor: bool, ss_ver: bool, bd: BD, @@ -875,11 +875,11 @@ pub unsafe fn w_mask_rust( (tmp1[x] as libc::c_int * n + tmp2[x] as libc::c_int * (64 - n) + rnd) >> sh, ); if h & ss_ver as usize != 0 { - mask[x >> 1] = ((m + n + mask[x >> 1] as libc::c_int + 2 - sign) >> 2) as u8; + mask[x >> 1] = ((m + n + mask[x >> 1] as libc::c_int + 2 - sign as libc::c_int) >> 2) as u8; } else if ss_ver { mask[x >> 1] = (m + n) as u8; } else { - mask[x >> 1] = ((m + n + 1 - sign) >> 1) as u8; + mask[x >> 1] = ((m + n + 1 - sign as libc::c_int) >> 1) as u8; } } else { mask[x] = m as u8; diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index 0ae60a9c1..1c303a767 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -3179,6 +3179,7 @@ unsafe extern "C" fn w_mask_c( ss_ver: libc::c_int, bitdepth_max: libc::c_int, ) { + debug_assert!(sign == 0 || sign == 1); w_mask_rust( dst, dst_stride as usize, @@ -3187,7 +3188,7 @@ unsafe extern "C" fn w_mask_c( w as usize, h as usize, mask, - sign, + sign != 0, ss_hor != 0, ss_ver != 0, BitDepth16::new(bitdepth_max as u16), diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index 2fd352635..df15c4b57 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -3080,6 +3080,7 @@ unsafe extern "C" fn w_mask_c( ss_hor: libc::c_int, ss_ver: libc::c_int, ) { + debug_assert!(sign == 0 || sign == 1); w_mask_rust( dst, dst_stride as usize, @@ -3088,7 +3089,7 @@ unsafe extern "C" fn w_mask_c( w as usize, h as usize, mask, - sign, + sign != 0, ss_hor != 0, ss_ver != 0, BitDepth8::new(()), From 49035f4d4142b3a7f0a53f7dd966437d1f6b2c43 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 01:50:45 -0700 Subject: [PATCH 09/12] `fn w_mask_rust`: Make `m`, `n` vars `u8`s and use `a.abs_diff(b)` instead of `(a - b).abs()`. --- src/mc.rs | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index b7038898b..f2753f24a 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -846,6 +846,7 @@ pub unsafe fn w_mask_rust( let [mut tmp1, mut tmp2] = [tmp1, tmp2].map(|tmp| std::slice::from_raw_parts(tmp, h * w)); let mut mask = std::slice::from_raw_parts_mut(mask, (w >> ss_hor as usize) * (h >> ss_ver as usize)); + let sign = sign as u8; let intermediate_bits = bd.get_intermediate_bits(); let bitdepth = bd.bitdepth(); @@ -856,33 +857,27 @@ pub unsafe fn w_mask_rust( for h in 0..h { let mut x = 0; while x < w { - let m = std::cmp::min( - 38 + (((tmp1[x] as libc::c_int - tmp2[x] as libc::c_int).abs() + mask_rnd) - >> mask_sh), - 64, - ); + let m = + std::cmp::min(38 + ((tmp1[x].abs_diff(tmp2[x]) + mask_rnd) >> mask_sh), 64) as u8; dst[x] = bd.iclip_pixel( - (tmp1[x] as libc::c_int * m + tmp2[x] as libc::c_int * (64 - m) + rnd) >> sh, + (tmp1[x] as i32 * m as i32 + tmp2[x] as i32 * (64 - m as i32) + rnd) >> sh, ); if ss_hor { x += 1; - let n = std::cmp::min( - 38 + (((tmp1[x] as libc::c_int - tmp2[x] as libc::c_int).abs() + mask_rnd) - >> mask_sh), - 64, - ); + let n = std::cmp::min(38 + ((tmp1[x].abs_diff(tmp2[x]) + mask_rnd) >> mask_sh), 64) + as u8; dst[x] = bd.iclip_pixel( - (tmp1[x] as libc::c_int * n + tmp2[x] as libc::c_int * (64 - n) + rnd) >> sh, + (tmp1[x] as i32 * n as i32 + tmp2[x] as i32 * (64 - n as i32) + rnd) >> sh, ); if h & ss_ver as usize != 0 { - mask[x >> 1] = ((m + n + mask[x >> 1] as libc::c_int + 2 - sign as libc::c_int) >> 2) as u8; + mask[x >> 1] = ((m + n + mask[x >> 1] + 2 - sign) >> 2) as u8; } else if ss_ver { - mask[x >> 1] = (m + n) as u8; + mask[x >> 1] = m + n; } else { - mask[x >> 1] = ((m + n + 1 - sign as libc::c_int) >> 1) as u8; + mask[x >> 1] = ((m + n + 1 - sign) >> 1) as u8; } } else { - mask[x] = m as u8; + mask[x] = m; } x += 1; } From ff4ef019a577cf5d9011696777b1a443c04d9b8f Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 3 Aug 2023 02:00:41 -0700 Subject: [PATCH 10/12] `fn w_mask_rust`: Use iterators to elide many bounds checks and made the guessed length more accurate. --- src/mc.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index f2753f24a..bf618b281 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -842,8 +842,9 @@ pub unsafe fn w_mask_rust( bd: BD, ) { let dst_stride = BD::pxstride(dst_stride); - let mut dst = std::slice::from_raw_parts_mut(dst, h * dst_stride + w); - let [mut tmp1, mut tmp2] = [tmp1, tmp2].map(|tmp| std::slice::from_raw_parts(tmp, h * w)); + let dst = + std::slice::from_raw_parts_mut(dst, if h == 0 { 0 } else { (h - 1) * dst_stride + w }); + let [tmp1, tmp2] = [tmp1, tmp2].map(|tmp| std::slice::from_raw_parts(tmp, h * w)); let mut mask = std::slice::from_raw_parts_mut(mask, (w >> ss_hor as usize) * (h >> ss_ver as usize)); let sign = sign as u8; @@ -854,7 +855,10 @@ pub unsafe fn w_mask_rust( let rnd = (32 << intermediate_bits) + i32::from(BD::PREP_BIAS) * 64; let mask_sh = bitdepth + intermediate_bits - 4; let mask_rnd = 1 << (mask_sh - 5); - for h in 0..h { + for (h, ((tmp1, tmp2), dst)) in iter::zip(tmp1.chunks_exact(w), tmp2.chunks_exact(w)) + .zip(dst.chunks_mut(dst_stride)) + .enumerate() + { let mut x = 0; while x < w { let m = @@ -881,9 +885,6 @@ pub unsafe fn w_mask_rust( } x += 1; } - tmp1 = &tmp1[w..]; - tmp2 = &tmp2[w..]; - dst = &mut dst[dst_stride..]; if !ss_ver || h & 1 != 0 { mask = &mut mask[w >> ss_hor as usize..]; } From 7b5a704b114af6e571cc2b4ebbde06ccacb5dd70 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 4 Aug 2023 15:30:11 -0700 Subject: [PATCH 11/12] `fn w_mask_rust`: Add back comments and line-breaks from C. --- src/mc.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mc.rs b/src/mc.rs index bf618b281..b65124c60 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -849,6 +849,8 @@ pub unsafe fn w_mask_rust( std::slice::from_raw_parts_mut(mask, (w >> ss_hor as usize) * (h >> ss_ver as usize)); let sign = sign as u8; + // store mask at 2x2 resolution, i.e. store 2x1 sum for even rows, + // and then load this intermediate to calculate final value for odd rows let intermediate_bits = bd.get_intermediate_bits(); let bitdepth = bd.bitdepth(); let sh = intermediate_bits + 6; @@ -866,13 +868,16 @@ pub unsafe fn w_mask_rust( dst[x] = bd.iclip_pixel( (tmp1[x] as i32 * m as i32 + tmp2[x] as i32 * (64 - m as i32) + rnd) >> sh, ); + if ss_hor { x += 1; + let n = std::cmp::min(38 + ((tmp1[x].abs_diff(tmp2[x]) + mask_rnd) >> mask_sh), 64) as u8; dst[x] = bd.iclip_pixel( (tmp1[x] as i32 * n as i32 + tmp2[x] as i32 * (64 - n as i32) + rnd) >> sh, ); + if h & ss_ver as usize != 0 { mask[x >> 1] = ((m + n + mask[x >> 1] + 2 - sign) >> 2) as u8; } else if ss_ver { @@ -885,6 +890,7 @@ pub unsafe fn w_mask_rust( } x += 1; } + if !ss_ver || h & 1 != 0 { mask = &mut mask[w >> ss_hor as usize..]; } From e6036f1bcd553b98635b25b1fab369699673d646 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 4 Aug 2023 15:47:06 -0700 Subject: [PATCH 12/12] `fn w_mask_rust`: Extract `mask[x >> 1]` assignment from an `if {} else if {} else`. --- src/mc.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index b65124c60..6ee8e4228 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -878,13 +878,13 @@ pub unsafe fn w_mask_rust( (tmp1[x] as i32 * n as i32 + tmp2[x] as i32 * (64 - n as i32) + rnd) >> sh, ); - if h & ss_ver as usize != 0 { - mask[x >> 1] = ((m + n + mask[x >> 1] + 2 - sign) >> 2) as u8; + mask[x >> 1] = if h & ss_ver as usize != 0 { + ((m + n + mask[x >> 1] + 2 - sign) >> 2) as u8 } else if ss_ver { - mask[x >> 1] = m + n; + m + n } else { - mask[x >> 1] = ((m + n + 1 - sign) >> 1) as u8; - } + ((m + n + 1 - sign) >> 1) as u8 + }; } else { mask[x] = m; }