Skip to content

Commit

Permalink
fn fguv 32x32xn_rust: Elide val bounds checks, fixing a perf regr…
Browse files Browse the repository at this point in the history
…ession (#560)

This elides bounds checks from indexing with `val` by clipping the index
to a valid one. Initially, I copied #558's approach, which uses
`cmp::min(val, scaling.as_ref().len() - 1)`, and this removes 3 bounds
checks. But since the scaling size is a `const` and is `1 <<
BD::SCALING_BITS`, we can just truncate the other bits with `&`, which
avoids a `csel` and 2 more bounds checks.
  • Loading branch information
kkysen authored Nov 11, 2023
2 parents 67c8f51 + d51c636 commit 0665de6
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
7 changes: 4 additions & 3 deletions include/common/bitdepth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ pub trait BitDepth: Clone + Copy {
+ ToPrimitive<c_int>;

type Scaling: AsRef<[u8]> + AsMut<[u8]> + ArrayDefault + Copy;
const SCALING_SIZE: usize;
const SCALING_BITS: usize;
const SCALING_SIZE: usize = 1 << Self::SCALING_BITS;

type BitDepthMax;

Expand Down Expand Up @@ -206,7 +207,7 @@ impl BitDepth for BitDepth8 {
type Entry = i8;

type Scaling = [u8; Self::SCALING_SIZE];
const SCALING_SIZE: usize = 1 << 8;
const SCALING_BITS: usize = 8;

type BitDepthMax = ();

Expand Down Expand Up @@ -287,7 +288,7 @@ impl BitDepth for BitDepth16 {
type Entry = i16;

type Scaling = [u8; Self::SCALING_SIZE];
const SCALING_SIZE: usize = 1 << 12;
const SCALING_BITS: usize = 12;

type BitDepthMax = Self::Pixel;

Expand Down
4 changes: 3 additions & 1 deletion src/filmgrain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -792,8 +792,10 @@ unsafe fn fguv_32x32xn_rust<BD: BitDepth>(
bd.bitdepth_max().as_::<c_int>(),
);
}
// `val` isn't out of bounds, so we can
// eliminate extra panicking code by bit-truncating `val`.
let noise = round2(
scaling.as_ref()[val as usize] as c_int * grain,
scaling.as_ref()[val as usize % scaling.as_ref().len()] as c_int * grain,
data.scaling_shift,
);
*dst = iclip((*src).as_::<c_int>() + noise, min_value, max_value).as_::<BD::Pixel>();
Expand Down

0 comments on commit 0665de6

Please sign in to comment.