From ff542e86f3d0045240dac379ed50d6dccedf7987 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 9 Nov 2023 14:35:43 -0800 Subject: [PATCH] `fn fguv_32x32xn_*`: Make `{uv,is_uv,uv_pl}` args `bool`s, casting to `c_int`/`usize` for FFI/indexing. By making the arg a `bool` in `fn fguv_32x32xn_rust` and doing the `as usize` cast inside the `fn`[1], (4[2]) bounds checks of `uv` into length-2 arrays are eliminated, fixing a perf regression. This was checked on `aarch64-unknown-linux-gnu` asm. [1] Doing the `bool` to `usize` cast inside of the `add_noise_uv` closure didn't make a difference in the number of bounds checks. [2] 27 => 23 `bl core::panicking::panic_bounds_check`s --- src/fg_apply.rs | 4 ++-- src/filmgrain.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/fg_apply.rs b/src/fg_apply.rs index ae10a1565..41844d55f 100644 --- a/src/fg_apply.rs +++ b/src/fg_apply.rs @@ -238,7 +238,7 @@ pub(crate) unsafe fn rav1d_apply_grain_row( row, luma_src, r#in.stride[0], - pl, + pl != 0, is_id, bd, ); @@ -258,7 +258,7 @@ pub(crate) unsafe fn rav1d_apply_grain_row( row, luma_src, r#in.stride[0], - pl, + pl != 0, is_id, bd, ); diff --git a/src/filmgrain.rs b/src/filmgrain.rs index 4603c305f..fc7648324 100644 --- a/src/filmgrain.rs +++ b/src/filmgrain.rs @@ -153,7 +153,7 @@ impl FnFGUV32x32xN { row_num: usize, luma_row: *const BD::Pixel, luma_stride: ptrdiff_t, - uv_pl: usize, + is_uv: bool, is_id: bool, bd: BD, ) { @@ -165,7 +165,7 @@ impl FnFGUV32x32xN { let bh = bh as c_int; let row_num = row_num as c_int; let luma_row = luma_row.cast(); - let uv_pl = uv_pl as c_int; + let uv_pl = is_uv as c_int; let is_id = is_id as c_int; let bd = bd.into_c(); (self.get())( @@ -706,13 +706,13 @@ unsafe fn fguv_32x32xn_rust( row_num: usize, luma_row: *const BD::Pixel, luma_stride: ptrdiff_t, - uv: usize, + is_uv: bool, is_id: bool, is_sx: bool, is_sy: bool, bd: BD, ) { - let [sx, sy] = [is_sx, is_sy].map(|it| it as usize); + let [uv, sx, sy] = [is_uv, is_sx, is_sy].map(|it| it as usize); let rows = 1 + (data.overlap_flag && row_num > 0) as usize; let bitdepth_min_8 = bd.bitdepth() - 8; @@ -894,7 +894,7 @@ unsafe extern "C" fn fguv_32x32xn_c_erased< row_num, luma_row, luma_stride, - uv_pl, + uv_pl != 0, is_id, IS_SX, IS_SY,