From 5e455b660b1d6d46500939be6c8d21c9fca169db Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 20:01:09 -0700 Subject: [PATCH 01/12] `fn avg_{c => rust}`: Deduplicate w/ generics and cleanup/re-translate. --- src/mc.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/mc.rs b/src/mc.rs index 1932b4707..915b3cc5d 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -661,3 +661,30 @@ pub unsafe fn prep_bilin_scaled_rust( tmp = tmp.offset(w as isize); } } + +unsafe fn avg_rust( + bd: BD, + mut dst: *mut BD::Pixel, + dst_stride: usize, + mut tmp1: *const i16, + mut tmp2: *const i16, + w: usize, + h: usize, +) { + let intermediate_bits = bd.get_intermediate_bits(); + let sh = intermediate_bits + 1; + let rnd = (1 << intermediate_bits) + i32::from(BD::PREP_BIAS) * 2; + let dst_stride = BD::pxstride(dst_stride); + for _ in 0..h { + for x in 0..w { + *dst.offset(x as isize) = bd.iclip_pixel( + ((*tmp1.offset(x as isize) as i32 + *tmp2.offset(x as isize) as i32 + rnd) >> sh) + .into(), + ); + } + + tmp1 = tmp1.offset(w as isize); + tmp2 = tmp2.offset(w as isize); + dst = dst.offset(dst_stride as isize); + } +} From d066062ced58931082b61f72a1eaaca2af64dfd9 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 20:01:09 -0700 Subject: [PATCH 02/12] `fn avg_rust`: Deduplicate calls. --- src/mc.rs | 5 +++-- src/mc_tmpl_16.rs | 34 ++++++++++------------------------ src/mc_tmpl_8.rs | 32 ++++++++++---------------------- 3 files changed, 23 insertions(+), 48 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index 915b3cc5d..0ecfb56af 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -662,14 +662,15 @@ pub unsafe fn prep_bilin_scaled_rust( } } -unsafe fn avg_rust( - bd: BD, +// TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated +pub unsafe fn avg_rust( mut dst: *mut BD::Pixel, dst_stride: usize, mut tmp1: *const i16, mut tmp2: *const i16, w: usize, h: usize, + bd: BD, ) { let intermediate_bits = bd.get_intermediate_bits(); let sh = intermediate_bits + 1; diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index 1d1f2a37c..c9ed7cc02 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -3072,6 +3072,7 @@ unsafe extern "C" fn prep_bilin_scaled_c( BitDepth16::new(bitdepth_max as u16), ) } +use crate::src::mc::avg_rust; unsafe extern "C" fn avg_c( mut dst: *mut pixel, dst_stride: ptrdiff_t, @@ -3081,30 +3082,15 @@ unsafe extern "C" fn avg_c( mut h: libc::c_int, bitdepth_max: libc::c_int, ) { - let intermediate_bits = 14 as libc::c_int - (32 - clz(bitdepth_max as libc::c_uint)); - let sh = intermediate_bits + 1; - let rnd = ((1 as libc::c_int) << intermediate_bits) + 8192 * 2; - loop { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = iclip( - *tmp1.offset(x as isize) as libc::c_int - + *tmp2.offset(x as isize) as libc::c_int - + rnd - >> sh, - 0 as libc::c_int, - bitdepth_max, - ) as pixel; - x += 1; - } - tmp1 = tmp1.offset(w as isize); - tmp2 = tmp2.offset(w as isize); - dst = dst.offset(PXSTRIDE(dst_stride) as isize); - h -= 1; - if !(h != 0) { - break; - } - } + avg_rust( + dst, + dst_stride as usize, + tmp1, + tmp2, + w as usize, + h as usize, + BitDepth16::new(bitdepth_max as u16), + ) } unsafe extern "C" fn w_avg_c( mut dst: *mut pixel, diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index 0d6e6eb54..f6389df52 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -2977,6 +2977,7 @@ unsafe extern "C" fn prep_bilin_scaled_c( BitDepth8::new(()), ) } +use crate::src::mc::avg_rust; unsafe extern "C" fn avg_c( mut dst: *mut pixel, dst_stride: ptrdiff_t, @@ -2985,28 +2986,15 @@ unsafe extern "C" fn avg_c( w: libc::c_int, mut h: libc::c_int, ) { - let intermediate_bits = 4; - let sh = intermediate_bits + 1; - let rnd = ((1 as libc::c_int) << intermediate_bits) + 0 * 2; - loop { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = iclip_u8( - *tmp1.offset(x as isize) as libc::c_int - + *tmp2.offset(x as isize) as libc::c_int - + rnd - >> sh, - ) as pixel; - x += 1; - } - tmp1 = tmp1.offset(w as isize); - tmp2 = tmp2.offset(w as isize); - dst = dst.offset(dst_stride as isize); - h -= 1; - if !(h != 0) { - break; - } - } + avg_rust( + dst, + dst_stride as usize, + tmp1, + tmp2, + w as usize, + h as usize, + BitDepth8::new(()), + ) } unsafe extern "C" fn w_avg_c( mut dst: *mut pixel, From 3d322c87ed777b68c20195dd9dbf22da84baa458 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 20:08:14 -0700 Subject: [PATCH 03/12] `fn w_avg_{c => rust}`: Deduplicate w/ generics and cleanup/re-translate. --- src/mc.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/mc.rs b/src/mc.rs index 0ecfb56af..50559b874 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -689,3 +689,33 @@ pub unsafe fn avg_rust( dst = dst.offset(dst_stride as isize); } } + +unsafe fn w_avg_rust( + bd: BD, + mut dst: *mut BD::Pixel, + dst_stride: usize, + mut tmp1: *const i16, + mut tmp2: *const i16, + w: usize, + h: usize, + weight: i32, +) { + let intermediate_bits = bd.get_intermediate_bits(); + let sh = intermediate_bits + 4; + let rnd = (8 << intermediate_bits) + i32::from(BD::PREP_BIAS) * 16; + let dst_stride = BD::pxstride(dst_stride); + for _ in 0..h { + for x in 0..w { + *dst.offset(x as isize) = bd.iclip_pixel( + (*tmp1.offset(x as isize) as i32 * weight + + *tmp2.offset(x as isize) as i32 * (16 - weight) + + rnd) + >> sh, + ); + } + + tmp1 = tmp1.offset(w as isize); + tmp2 = tmp2.offset(w as isize); + dst = dst.offset(dst_stride as isize); + } +} From 9e093f942fddde63fd8a2ef4296033f3ba4b0027 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 20:08:14 -0700 Subject: [PATCH 04/12] `fn w_avg_rust`: Deduplicate calls. --- src/mc.rs | 5 +++-- src/mc_tmpl_16.rs | 43 +++++++++++++++---------------------------- src/mc_tmpl_8.rs | 41 +++++++++++++++-------------------------- 3 files changed, 33 insertions(+), 56 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index 50559b874..2d3ceca35 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -690,8 +690,8 @@ pub unsafe fn avg_rust( } } -unsafe fn w_avg_rust( - bd: BD, +// TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated +pub unsafe fn w_avg_rust( mut dst: *mut BD::Pixel, dst_stride: usize, mut tmp1: *const i16, @@ -699,6 +699,7 @@ unsafe fn w_avg_rust( w: usize, h: usize, weight: i32, + bd: BD, ) { let intermediate_bits = bd.get_intermediate_bits(); let sh = intermediate_bits + 4; diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index c9ed7cc02..70eafc98d 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -3092,40 +3092,27 @@ unsafe extern "C" fn avg_c( BitDepth16::new(bitdepth_max as u16), ) } +use crate::src::mc::w_avg_rust; unsafe extern "C" fn w_avg_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp1: *const int16_t, - mut tmp2: *const int16_t, + tmp1: *const int16_t, + tmp2: *const int16_t, w: libc::c_int, - mut h: libc::c_int, + h: libc::c_int, weight: libc::c_int, bitdepth_max: libc::c_int, ) { - let intermediate_bits = 14 as libc::c_int - (32 - clz(bitdepth_max as libc::c_uint)); - let sh = intermediate_bits + 4; - let rnd = ((8 as libc::c_int) << intermediate_bits) + 8192 * 16; - loop { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = iclip( - *tmp1.offset(x as isize) as libc::c_int * weight - + *tmp2.offset(x as isize) as libc::c_int * (16 - weight) - + rnd - >> sh, - 0 as libc::c_int, - bitdepth_max, - ) as pixel; - x += 1; - } - tmp1 = tmp1.offset(w as isize); - tmp2 = tmp2.offset(w as isize); - dst = dst.offset(PXSTRIDE(dst_stride) as isize); - h -= 1; - if !(h != 0) { - break; - } - } + w_avg_rust( + dst, + dst_stride as usize, + tmp1, + tmp2, + w as usize, + h as usize, + weight, + BitDepth16::new(bitdepth_max as u16), + ) } unsafe extern "C" fn mask_c( mut dst: *mut pixel, diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index f6389df52..a90e884b7 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -2996,37 +2996,26 @@ unsafe extern "C" fn avg_c( BitDepth8::new(()), ) } +use crate::src::mc::w_avg_rust; unsafe extern "C" fn w_avg_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp1: *const int16_t, - mut tmp2: *const int16_t, + tmp1: *const int16_t, + tmp2: *const int16_t, w: libc::c_int, - mut h: libc::c_int, + h: libc::c_int, weight: libc::c_int, ) { - let intermediate_bits = 4; - let sh = intermediate_bits + 4; - let rnd = ((8 as libc::c_int) << intermediate_bits) + 0 * 16; - loop { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = iclip_u8( - *tmp1.offset(x as isize) as libc::c_int * weight - + *tmp2.offset(x as isize) as libc::c_int * (16 - weight) - + rnd - >> sh, - ) as pixel; - x += 1; - } - tmp1 = tmp1.offset(w as isize); - tmp2 = tmp2.offset(w as isize); - dst = dst.offset(dst_stride as isize); - h -= 1; - if !(h != 0) { - break; - } - } + w_avg_rust( + dst, + dst_stride as usize, + tmp1, + tmp2, + w as usize, + h as usize, + weight, + BitDepth8::new(()), + ) } unsafe extern "C" fn mask_c( mut dst: *mut pixel, From 48ac22c55aa374d70ad8bfba45b6cc390de01152 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 21:08:06 -0700 Subject: [PATCH 05/12] `fn mask_{c => rust}`: Deduplicate w/ generics and cleanup/re-translate. --- src/mc.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/mc.rs b/src/mc.rs index 2d3ceca35..b941916de 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -720,3 +720,34 @@ pub unsafe fn w_avg_rust( dst = dst.offset(dst_stride as isize); } } + +unsafe fn mask_rust( + bd: BD, + mut dst: *mut BD::Pixel, + dst_stride: usize, + mut tmp1: *const i16, + mut tmp2: *const i16, + w: usize, + h: usize, + mut mask: *const u8, +) { + let intermediate_bits = bd.get_intermediate_bits(); + let sh = intermediate_bits + 6; + let rnd = (32 << intermediate_bits) + i32::from(BD::PREP_BIAS) * 64; + let dst_stride = BD::pxstride(dst_stride); + for _ in 0..h { + for x in 0..w { + *dst.offset(x as isize) = bd.iclip_pixel( + (*tmp1.offset(x as isize) as i32 * *mask.offset(x as isize) as i32 + + *tmp2.offset(x as isize) as i32 * (64 - *mask.offset(x as isize) as i32) + + rnd) + >> sh, + ); + } + + tmp1 = tmp1.offset(w as isize); + tmp2 = tmp2.offset(w as isize); + mask = mask.offset(w as isize); + dst = dst.offset(dst_stride as isize); + } +} From b846f1c3a52e6d3ead768d372e54ee057de0122a Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 21:08:06 -0700 Subject: [PATCH 06/12] `fn mask_rust`: Deduplicate calls. --- src/mc.rs | 5 +++-- src/mc_tmpl_16.rs | 47 ++++++++++++++++------------------------------- src/mc_tmpl_8.rs | 45 ++++++++++++++++----------------------------- 3 files changed, 35 insertions(+), 62 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index b941916de..c3551f240 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -721,8 +721,8 @@ pub unsafe fn w_avg_rust( } } -unsafe fn mask_rust( - bd: BD, +// TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated +pub unsafe fn mask_rust( mut dst: *mut BD::Pixel, dst_stride: usize, mut tmp1: *const i16, @@ -730,6 +730,7 @@ unsafe fn mask_rust( w: usize, h: usize, mut mask: *const u8, + bd: BD, ) { let intermediate_bits = bd.get_intermediate_bits(); let sh = intermediate_bits + 6; diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index 70eafc98d..60d403974 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -3114,42 +3114,27 @@ unsafe extern "C" fn w_avg_c( BitDepth16::new(bitdepth_max as u16), ) } +use crate::src::mc::mask_rust; unsafe extern "C" fn mask_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp1: *const int16_t, - mut tmp2: *const int16_t, + tmp1: *const int16_t, + tmp2: *const int16_t, w: libc::c_int, - mut h: libc::c_int, - mut mask: *const uint8_t, + h: libc::c_int, + mask: *const uint8_t, bitdepth_max: libc::c_int, ) { - let intermediate_bits = 14 as libc::c_int - (32 - clz(bitdepth_max as libc::c_uint)); - let sh = intermediate_bits + 6; - let rnd = ((32 as libc::c_int) << intermediate_bits) + 8192 * 64; - loop { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = iclip( - *tmp1.offset(x as isize) as libc::c_int * *mask.offset(x as isize) as libc::c_int - + *tmp2.offset(x as isize) as libc::c_int - * (64 - *mask.offset(x as isize) as libc::c_int) - + rnd - >> sh, - 0 as libc::c_int, - bitdepth_max, - ) as pixel; - x += 1; - } - tmp1 = tmp1.offset(w as isize); - tmp2 = tmp2.offset(w as isize); - mask = mask.offset(w as isize); - dst = dst.offset(PXSTRIDE(dst_stride) as isize); - h -= 1; - if !(h != 0) { - break; - } - } + mask_rust( + dst, + dst_stride as usize, + tmp1, + tmp2, + w as usize, + h as usize, + mask, + BitDepth16::new(bitdepth_max as u16), + ) } unsafe extern "C" fn blend_c( mut dst: *mut pixel, diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index a90e884b7..97ca1a237 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -3017,39 +3017,26 @@ unsafe extern "C" fn w_avg_c( BitDepth8::new(()), ) } +use crate::src::mc::mask_rust; unsafe extern "C" fn mask_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp1: *const int16_t, - mut tmp2: *const int16_t, + tmp1: *const int16_t, + tmp2: *const int16_t, w: libc::c_int, - mut h: libc::c_int, - mut mask: *const uint8_t, + h: libc::c_int, + mask: *const uint8_t, ) { - let intermediate_bits = 4; - let sh = intermediate_bits + 6; - let rnd = ((32 as libc::c_int) << intermediate_bits) + 0 * 64; - loop { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = iclip_u8( - *tmp1.offset(x as isize) as libc::c_int * *mask.offset(x as isize) as libc::c_int - + *tmp2.offset(x as isize) as libc::c_int - * (64 - *mask.offset(x as isize) as libc::c_int) - + rnd - >> sh, - ) as pixel; - x += 1; - } - tmp1 = tmp1.offset(w as isize); - tmp2 = tmp2.offset(w as isize); - mask = mask.offset(w as isize); - dst = dst.offset(dst_stride as isize); - h -= 1; - if !(h != 0) { - break; - } - } + mask_rust( + dst, + dst_stride as usize, + tmp1, + tmp2, + w as usize, + h as usize, + mask, + BitDepth8::new(()), + ) } unsafe extern "C" fn blend_c( mut dst: *mut pixel, From 1cb585b12d05854aa9d899a68bde3ed1cfd6dd53 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 21:43:57 -0700 Subject: [PATCH 07/12] `fn blend_{c => rust}`: Deduplicate w/ generics and cleanup/re-translate. This also adds arithmetic traits on `BitDepth::Pixel` to allow for such math. --- include/common/bitdepth.rs | 5 ++++- src/mc.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/include/common/bitdepth.rs b/include/common/bitdepth.rs index 66abc860e..8201d23b7 100644 --- a/include/common/bitdepth.rs +++ b/include/common/bitdepth.rs @@ -1,6 +1,6 @@ use std::ffi::{c_int, c_uint}; use std::fmt::{self, Display, Formatter}; -use std::ops::Add; +use std::ops::{Add, Mul, Shr}; use crate::include::common::intops::clip; @@ -77,6 +77,9 @@ pub trait BitDepth: Clone + Copy { type Pixel: Copy + Ord + + Add + + Mul + + Shr + From + Into + TryFrom diff --git a/src/mc.rs b/src/mc.rs index c3551f240..cdbbebec7 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -752,3 +752,32 @@ pub unsafe fn mask_rust( dst = dst.offset(dst_stride as isize); } } + +fn blend_px(a: BD::Pixel, b: BD::Pixel, m: u8) -> BD::Pixel { + let m = m as u32; + ((a.as_::() * (64 - m) + b.as_::() * m + 32) >> 6).as_::() +} + +unsafe fn blend_rust( + mut dst: *mut BD::Pixel, + dst_stride: usize, + mut tmp: *const BD::Pixel, + w: usize, + h: usize, + mut mask: *const u8, +) { + let dst_stride = BD::pxstride(dst_stride); + for _ in 0..h { + for x in 0..w { + *dst.offset(x as isize) = blend_px::( + *dst.offset(x as isize), + *tmp.offset(x as isize), + *mask.offset(x as isize), + ) + } + + dst = dst.offset(dst_stride as isize); + tmp = tmp.offset(w as isize); + mask = mask.offset(w as isize); + } +} From cabbef7d0355e83a547fbd065e6e66448c073ab5 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 24 Jul 2023 16:43:29 -0700 Subject: [PATCH 08/12] `fn blend_rust`: Deduplicate calls. --- src/mc.rs | 3 ++- src/mc_tmpl_16.rs | 28 ++++++---------------------- src/mc_tmpl_8.rs | 28 ++++++---------------------- 3 files changed, 14 insertions(+), 45 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index cdbbebec7..6dbc841e8 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -758,7 +758,8 @@ fn blend_px(a: BD::Pixel, b: BD::Pixel, m: u8) -> BD::Pixel { ((a.as_::() * (64 - m) + b.as_::() * m + 32) >> 6).as_::() } -unsafe fn blend_rust( +// TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated +pub unsafe fn blend_rust( mut dst: *mut BD::Pixel, dst_stride: usize, mut tmp: *const BD::Pixel, diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index 60d403974..17597edf5 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -3136,32 +3136,16 @@ unsafe extern "C" fn mask_c( BitDepth16::new(bitdepth_max as u16), ) } +use crate::src::mc::blend_rust; unsafe extern "C" fn blend_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp: *const pixel, + tmp: *const pixel, w: libc::c_int, - mut h: libc::c_int, - mut mask: *const uint8_t, + h: libc::c_int, + mask: *const uint8_t, ) { - loop { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = (*dst.offset(x as isize) as libc::c_int - * (64 - *mask.offset(x as isize) as libc::c_int) - + *tmp.offset(x as isize) as libc::c_int * *mask.offset(x as isize) as libc::c_int - + 32 - >> 6) as pixel; - x += 1; - } - dst = dst.offset(PXSTRIDE(dst_stride) as isize); - tmp = tmp.offset(w as isize); - mask = mask.offset(w as isize); - h -= 1; - if !(h != 0) { - break; - } - } + blend_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize, mask) } unsafe extern "C" fn blend_v_c( mut dst: *mut pixel, diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index 97ca1a237..30eb8bbac 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -3038,32 +3038,16 @@ unsafe extern "C" fn mask_c( BitDepth8::new(()), ) } +use crate::src::mc::blend_rust; unsafe extern "C" fn blend_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp: *const pixel, + tmp: *const pixel, w: libc::c_int, - mut h: libc::c_int, - mut mask: *const uint8_t, + h: libc::c_int, + mask: *const uint8_t, ) { - loop { - let mut x = 0; - while x < w { - *dst.offset(x as isize) = (*dst.offset(x as isize) as libc::c_int - * (64 - *mask.offset(x as isize) as libc::c_int) - + *tmp.offset(x as isize) as libc::c_int * *mask.offset(x as isize) as libc::c_int - + 32 - >> 6) as pixel; - x += 1; - } - dst = dst.offset(dst_stride as isize); - tmp = tmp.offset(w as isize); - mask = mask.offset(w as isize); - h -= 1; - if !(h != 0) { - break; - } - } + blend_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize, mask) } unsafe extern "C" fn blend_v_c( mut dst: *mut pixel, From c6540b69c3f98edb7292cdd2592779f15b593e9b Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 21:54:28 -0700 Subject: [PATCH 09/12] `fn blend_v_{c => rust}`: Deduplicate w/ generics and cleanup/re-translate. --- src/mc.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/mc.rs b/src/mc.rs index 6dbc841e8..7263f6ec2 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -3,6 +3,7 @@ use std::iter; use crate::include::common::bitdepth::{AsPrimitive, BitDepth}; use crate::include::dav1d::headers::Dav1dFilterMode; use crate::src::tables::dav1d_mc_subpel_filters; +use crate::src::tables::dav1d_obmc_masks; // TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated #[inline(never)] @@ -782,3 +783,23 @@ pub unsafe fn blend_rust( mask = mask.offset(w as isize); } } + +unsafe fn blend_v_rust( + mut dst: *mut BD::Pixel, + dst_stride: usize, + mut tmp: *const BD::Pixel, + w: usize, + h: usize, +) { + let mask = &dav1d_obmc_masks.0[w..]; + let dst_stride = BD::pxstride(dst_stride); + for _ in 0..h { + for x in 0..(w * 3 >> 2) { + *dst.offset(x as isize) = + blend_px::(*dst.offset(x as isize), *tmp.offset(x as isize), mask[x]) + } + + dst = dst.offset(dst_stride as isize); + tmp = tmp.offset(w as isize); + } +} From 0d6184d0a97110d61d3eb4fe0e35d90492d74223 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 24 Jul 2023 16:46:10 -0700 Subject: [PATCH 10/12] `fn blend_v_rust`: Deduplicate calls. --- src/mc.rs | 3 ++- src/mc_tmpl_16.rs | 26 +++++--------------------- src/mc_tmpl_8.rs | 26 +++++--------------------- 3 files changed, 12 insertions(+), 43 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index 7263f6ec2..46dcc3a63 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -784,7 +784,8 @@ pub unsafe fn blend_rust( } } -unsafe fn blend_v_rust( +// TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated +pub unsafe fn blend_v_rust( mut dst: *mut BD::Pixel, dst_stride: usize, mut tmp: *const BD::Pixel, diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index 17597edf5..9c42f19bb 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -3147,31 +3147,15 @@ unsafe extern "C" fn blend_c( ) { blend_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize, mask) } +use crate::src::mc::blend_v_rust; unsafe extern "C" fn blend_v_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp: *const pixel, + tmp: *const pixel, w: libc::c_int, - mut h: libc::c_int, + h: libc::c_int, ) { - let mask: *const uint8_t = &*dav1d_obmc_masks.0.as_ptr().offset(w as isize) as *const uint8_t; - loop { - let mut x = 0; - while x < w * 3 >> 2 { - *dst.offset(x as isize) = (*dst.offset(x as isize) as libc::c_int - * (64 - *mask.offset(x as isize) as libc::c_int) - + *tmp.offset(x as isize) as libc::c_int * *mask.offset(x as isize) as libc::c_int - + 32 - >> 6) as pixel; - x += 1; - } - dst = dst.offset(PXSTRIDE(dst_stride) as isize); - tmp = tmp.offset(w as isize); - h -= 1; - if !(h != 0) { - break; - } - } + blend_v_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize) } unsafe extern "C" fn blend_h_c( mut dst: *mut pixel, diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index 30eb8bbac..ff97e52a2 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -3049,31 +3049,15 @@ unsafe extern "C" fn blend_c( ) { blend_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize, mask) } +use crate::src::mc::blend_v_rust; unsafe extern "C" fn blend_v_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp: *const pixel, + tmp: *const pixel, w: libc::c_int, - mut h: libc::c_int, + h: libc::c_int, ) { - let mask: *const uint8_t = &*dav1d_obmc_masks.0.as_ptr().offset(w as isize) as *const uint8_t; - loop { - let mut x = 0; - while x < w * 3 >> 2 { - *dst.offset(x as isize) = (*dst.offset(x as isize) as libc::c_int - * (64 - *mask.offset(x as isize) as libc::c_int) - + *tmp.offset(x as isize) as libc::c_int * *mask.offset(x as isize) as libc::c_int - + 32 - >> 6) as pixel; - x += 1; - } - dst = dst.offset(dst_stride as isize); - tmp = tmp.offset(w as isize); - h -= 1; - if !(h != 0) { - break; - } - } + blend_v_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize) } unsafe extern "C" fn blend_h_c( mut dst: *mut pixel, From 202cd447f37282e63b71ed6ba79d49079e90dccf Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Wed, 28 Jun 2023 21:58:57 -0700 Subject: [PATCH 11/12] `fn blend_h_{c => rust}`: Deduplicate w/ generics and cleanup/re-translate. --- src/mc.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/mc.rs b/src/mc.rs index 46dcc3a63..d788338a0 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -804,3 +804,24 @@ pub unsafe fn blend_v_rust( tmp = tmp.offset(w as isize); } } + +unsafe fn blend_h_rust( + mut dst: *mut BD::Pixel, + dst_stride: usize, + mut tmp: *const BD::Pixel, + w: usize, + h: usize, +) { + let mask = &dav1d_obmc_masks.0[h..]; + let h = h * 3 >> 2; + let dst_stride = BD::pxstride(dst_stride); + for y in 0..h { + for x in 0..w { + *dst.offset(x as isize) = + blend_px::(*dst.offset(x as isize), *tmp.offset(x as isize), mask[y]); + } + + dst = dst.offset(dst_stride as isize); + tmp = tmp.offset(w as isize); + } +} From 8ede03cf7d05253dc95a5722d3014f4e2e844351 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 24 Jul 2023 16:48:24 -0700 Subject: [PATCH 12/12] `fn blend_h_rust`: Deduplicate calls. --- src/mc.rs | 3 ++- src/mc_tmpl_16.rs | 31 +++++-------------------------- src/mc_tmpl_8.rs | 31 +++++-------------------------- 3 files changed, 12 insertions(+), 53 deletions(-) diff --git a/src/mc.rs b/src/mc.rs index d788338a0..c097aa87d 100644 --- a/src/mc.rs +++ b/src/mc.rs @@ -805,7 +805,8 @@ pub unsafe fn blend_v_rust( } } -unsafe fn blend_h_rust( +// TODO(kkysen) temporarily `pub` until `mc` callers are deduplicated +pub unsafe fn blend_h_rust( mut dst: *mut BD::Pixel, dst_stride: usize, mut tmp: *const BD::Pixel, diff --git a/src/mc_tmpl_16.rs b/src/mc_tmpl_16.rs index 9c42f19bb..191de3ef8 100644 --- a/src/mc_tmpl_16.rs +++ b/src/mc_tmpl_16.rs @@ -1846,7 +1846,6 @@ extern "C" { } use crate::src::tables::dav1d_mc_warp_filter; -use crate::src::tables::dav1d_obmc_masks; use crate::src::tables::dav1d_resize_filter; pub type pixel = uint16_t; @@ -3157,35 +3156,15 @@ unsafe extern "C" fn blend_v_c( ) { blend_v_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize) } +use crate::src::mc::blend_h_rust; unsafe extern "C" fn blend_h_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp: *const pixel, + tmp: *const pixel, w: libc::c_int, - mut h: libc::c_int, + h: libc::c_int, ) { - let mut mask: *const uint8_t = - &*dav1d_obmc_masks.0.as_ptr().offset(h as isize) as *const uint8_t; - h = h * 3 >> 2; - loop { - let fresh0 = mask; - mask = mask.offset(1); - let m = *fresh0 as libc::c_int; - let mut x = 0; - while x < w { - *dst.offset(x as isize) = (*dst.offset(x as isize) as libc::c_int * (64 - m) - + *tmp.offset(x as isize) as libc::c_int * m - + 32 - >> 6) as pixel; - x += 1; - } - dst = dst.offset(PXSTRIDE(dst_stride) as isize); - tmp = tmp.offset(w as isize); - h -= 1; - if !(h != 0) { - break; - } - } + blend_h_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize) } unsafe extern "C" fn w_mask_c( mut dst: *mut pixel, diff --git a/src/mc_tmpl_8.rs b/src/mc_tmpl_8.rs index ff97e52a2..9c3b9ae80 100644 --- a/src/mc_tmpl_8.rs +++ b/src/mc_tmpl_8.rs @@ -1817,7 +1817,6 @@ extern "C" { } use crate::src::tables::dav1d_mc_warp_filter; -use crate::src::tables::dav1d_obmc_masks; use crate::src::tables::dav1d_resize_filter; pub type pixel = uint8_t; @@ -3059,35 +3058,15 @@ unsafe extern "C" fn blend_v_c( ) { blend_v_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize) } +use crate::src::mc::blend_h_rust; unsafe extern "C" fn blend_h_c( - mut dst: *mut pixel, + dst: *mut pixel, dst_stride: ptrdiff_t, - mut tmp: *const pixel, + tmp: *const pixel, w: libc::c_int, - mut h: libc::c_int, + h: libc::c_int, ) { - let mut mask: *const uint8_t = - &*dav1d_obmc_masks.0.as_ptr().offset(h as isize) as *const uint8_t; - h = h * 3 >> 2; - loop { - let fresh0 = mask; - mask = mask.offset(1); - let m = *fresh0 as libc::c_int; - let mut x = 0; - while x < w { - *dst.offset(x as isize) = (*dst.offset(x as isize) as libc::c_int * (64 - m) - + *tmp.offset(x as isize) as libc::c_int * m - + 32 - >> 6) as pixel; - x += 1; - } - dst = dst.offset(dst_stride as isize); - tmp = tmp.offset(w as isize); - h -= 1; - if !(h != 0) { - break; - } - } + blend_h_rust::(dst, dst_stride as usize, tmp, w as usize, h as usize) } unsafe extern "C" fn w_mask_c( mut dst: *mut pixel,