Skip to content

Commit

Permalink
Add a bunch of inline(always) for c_unwind
Browse files Browse the repository at this point in the history
  • Loading branch information
DianQK committed Feb 25, 2024
1 parent 351d48e commit a528944
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 8 deletions.
6 changes: 6 additions & 0 deletions src/float/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
///
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>
mod int_to_float {
#[inline(always)]
pub fn u32_to_f32_bits(i: u32) -> u32 {
if i == 0 {
return 0;
Expand All @@ -17,6 +18,7 @@ mod int_to_float {
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
}

#[inline(always)]
pub fn u32_to_f64_bits(i: u32) -> u64 {
if i == 0 {
return 0;
Expand All @@ -27,6 +29,7 @@ mod int_to_float {
(e << 52) + m // Bit 53 of m will overflow into e.
}

#[inline(always)]
pub fn u64_to_f32_bits(i: u64) -> u32 {
let n = i.leading_zeros();
let y = i.wrapping_shl(n);
Expand All @@ -37,6 +40,7 @@ mod int_to_float {
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
}

#[inline(always)]
pub fn u64_to_f64_bits(i: u64) -> u64 {
if i == 0 {
return 0;
Expand All @@ -49,6 +53,7 @@ mod int_to_float {
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
}

#[inline(always)]
pub fn u128_to_f32_bits(i: u128) -> u32 {
let n = i.leading_zeros();
let y = i.wrapping_shl(n);
Expand All @@ -59,6 +64,7 @@ mod int_to_float {
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
}

#[inline(always)]
pub fn u128_to_f64_bits(i: u128) -> u64 {
let n = i.leading_zeros();
let y = i.wrapping_shl(n);
Expand Down
11 changes: 11 additions & 0 deletions src/float/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,41 +120,51 @@ macro_rules! float_impl {
const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS;
const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);

#[inline(always)]
fn repr(self) -> Self::Int {
self.to_bits()
}
#[inline(always)]
fn signed_repr(self) -> Self::SignedInt {
self.to_bits() as Self::SignedInt
}
#[inline(always)]
fn eq_repr(self, rhs: Self) -> bool {
if self.is_nan() && rhs.is_nan() {
true
} else {
self.repr() == rhs.repr()
}
}
#[inline(always)]
fn sign(self) -> bool {
self.signed_repr() < Self::SignedInt::ZERO
}
#[inline(always)]
fn exp(self) -> Self::ExpInt {
((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt
}
#[inline(always)]
fn frac(self) -> Self::Int {
self.to_bits() & Self::SIGNIFICAND_MASK
}
#[inline(always)]
fn imp_frac(self) -> Self::Int {
self.frac() | Self::IMPLICIT_BIT
}
#[inline(always)]
fn from_repr(a: Self::Int) -> Self {
Self::from_bits(a)
}
#[inline(always)]
fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self {
Self::from_repr(
((sign as Self::Int) << (Self::BITS - 1))
| ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
| (significand & Self::SIGNIFICAND_MASK),
)
}
#[inline(always)]
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
let shift = significand
.leading_zeros()
Expand All @@ -164,6 +174,7 @@ macro_rules! float_impl {
significand << shift as Self::Int,
)
}
#[inline(always)]
fn is_subnormal(self) -> bool {
(self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO
}
Expand Down
12 changes: 12 additions & 0 deletions src/int/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,50 +151,62 @@ macro_rules! int_impl_common {
}
};

#[inline(always)]
fn from_bool(b: bool) -> Self {
b as $ty
}

#[inline(always)]
fn logical_shr(self, other: u32) -> Self {
Self::from_unsigned(self.unsigned().wrapping_shr(other))
}

#[inline(always)]
fn is_zero(self) -> bool {
self == Self::ZERO
}

#[inline(always)]
fn wrapping_neg(self) -> Self {
<Self>::wrapping_neg(self)
}

#[inline(always)]
fn wrapping_add(self, other: Self) -> Self {
<Self>::wrapping_add(self, other)
}

#[inline(always)]
fn wrapping_mul(self, other: Self) -> Self {
<Self>::wrapping_mul(self, other)
}

#[inline(always)]
fn wrapping_sub(self, other: Self) -> Self {
<Self>::wrapping_sub(self, other)
}

#[inline(always)]
fn wrapping_shl(self, other: u32) -> Self {
<Self>::wrapping_shl(self, other)
}

#[inline(always)]
fn wrapping_shr(self, other: u32) -> Self {
<Self>::wrapping_shr(self, other)
}

#[inline(always)]
fn rotate_left(self, other: u32) -> Self {
<Self>::rotate_left(self, other)
}

#[inline(always)]
fn overflowing_add(self, other: Self) -> (Self, bool) {
<Self>::overflowing_add(self, other)
}

#[inline(always)]
fn leading_zeros(self) -> u32 {
<Self>::leading_zeros(self)
}
Expand Down
4 changes: 4 additions & 0 deletions src/int/specialized_div_rem/asymmetric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ macro_rules! impl_asymmetric {
$uH:ident, // unsigned integer with half the bit width of $uX
$uX:ident, // unsigned integer with half the bit width of $uD
$uD:ident // unsigned integer type for the inputs and outputs of `$fn`
$(, $fun_attr:meta)* // attributes for the function
) => {
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$fun_attr]
)*
pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
let n: u32 = $n_h * 2;

Expand Down
4 changes: 4 additions & 0 deletions src/int/specialized_div_rem/delegate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ macro_rules! impl_delegate {
$uX:ident, // unsigned integer with half the bit width of $uD.
$uD:ident, // unsigned integer type for the inputs and outputs of `$fn`
$iD:ident // signed integer type with the same bitwidth as `$uD`
$(, $fun_attr:meta)* // attributes for the function
) => {
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$fun_attr]
)*
pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
// The two possibility algorithm, undersubtracting long division algorithm, or any kind
// of reciprocal based algorithm will not be fastest, because they involve large
Expand Down
24 changes: 16 additions & 8 deletions src/int/specialized_div_rem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,17 @@ impl_normalization_shift!(
32,
u32,
i32,
allow(dead_code)
allow(dead_code),
inline(always)
);
impl_normalization_shift!(
u64_normalization_shift,
USE_LZ,
64,
u64,
i64,
allow(dead_code)
allow(dead_code),
inline(always)
);

/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
Expand Down Expand Up @@ -149,7 +151,8 @@ impl_trifecta!(
32,
u32,
u64,
u128
u128,
inline(always)
);

// If the pointer width less than 64, then the target architecture almost certainly does not have
Expand All @@ -168,7 +171,8 @@ impl_delegate!(
u32,
u64,
u128,
i128
i128,
inline(always)
);

/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
Expand Down Expand Up @@ -209,7 +213,8 @@ impl_asymmetric!(
32,
u32,
u64,
u128
u128,
inline(always)
);

/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
Expand Down Expand Up @@ -255,7 +260,8 @@ impl_binary_long!(
u64_normalization_shift,
64,
u64,
i64
i64,
inline(always)
);

/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
Expand Down Expand Up @@ -296,7 +302,8 @@ impl_asymmetric!(
16,
u16,
u32,
u64
u64,
inline(always)
);

// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
Expand All @@ -307,5 +314,6 @@ impl_binary_long!(
32,
u32,
i32,
allow(dead_code)
allow(dead_code),
inline(always)
);
4 changes: 4 additions & 0 deletions src/int/specialized_div_rem/trifecta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ macro_rules! impl_trifecta {
$uH:ident, // unsigned integer with half the bit width of $uX
$uX:ident, // unsigned integer with half the bit width of $uD
$uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name`
$(, $fun_attr:meta)* // attributes for the function
) => {
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$fun_attr]
)*
pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
// This is called the trifecta algorithm because it uses three main algorithms: short
// division for small divisors, the two possibility algorithm for large divisors, and an
Expand Down
1 change: 1 addition & 0 deletions src/mem/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
}

/// Determine optimal parameters for a `rep` instruction.
#[inline(always)]
fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) {
// Unaligned writes are still slow on modern processors, so align the destination address.
let pre_byte_count = ((8 - (dest as usize & 0b111)) & 0b111).min(count);
Expand Down

0 comments on commit a528944

Please sign in to comment.