Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
tgross35 committed Sep 26, 2024
1 parent a5277c0 commit d1aa791
Showing 1 changed file with 37 additions and 22 deletions.
59 changes: 37 additions & 22 deletions src/float/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,33 @@ use super::Float;
/// - Calculate a base mantissa by shifting the integer into mantissa position
/// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns apply
/// here, so they may be "squashed" into smaller numbers to simplify the classification.
///
/// # Terminology
///
/// - `i`: the original integer
/// - `i_m`: the integer, shifted fully left (no leading zeros)
/// - `n`: number of leading zeroes
/// - `e`: the resulting exponent
/// - `m`: the resulting mantissa
/// - `m_base`: the mantissa before adjusting for truncated bits
mod int_to_float {
use super::*;

/// Calculate the exponent from the number of leading zeros.
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
F::Int::cast_from(I::BITS + F::EXPONENT_BIAS - 2 - n)
F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
}

/// Shift the integer into the float's mantissa bits. Keep the lowest exponent bit intact.
fn m_base<I: Int, F: Float<Int: CastFrom<I>>>(i_m: I) -> F::Int {
/// Calculate the mantissa in cases where the float size is less than integer size. An
/// adjustment of the final mantissa will be needed, but it is calculated separately.
fn m_f_lt_i<I: Int, F: Float<Int: CastFrom<I>>>(i_m: I) -> F::Int {
// `i_m` already has no leading zeros. Just shift it into the float's mantissa bits,
// retaining the highest bits.
F::Int::cast_from(i_m >> ((I::BITS - F::BITS) + F::EXPONENT_BITS))
}

/// Calculate the mantissa in cases where the float size is greater than integer size
fn m_f_gt_i<I: Int, F: Float<Int: CastFrom<I>>>(i: I, n: u32) -> F::Int {
F::Int::cast_from(i) << (F::SIGNIFICAND_BITS - I::BITS + 1 + n)
}

/// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes
/// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes.
/// Returns the mantissa and necessary adjustment.
fn m_f_eq_i<I: Int + CastInto<F::Int>, F: Float<Int = I>>(i: I, n: u32) -> (F::Int, F::Int) {
let base = (i << n) >> F::EXPONENT_BITS;

Expand All @@ -43,16 +51,23 @@ mod int_to_float {
(base, adj)
}

/// Calculate the mantissa in cases where the float size is greater than integer size
fn m_f_gt_i<I: Int, F: Float<Int: CastFrom<I>>>(i: I, n: u32) -> F::Int {
F::Int::cast_from(i) << (F::SIGNIFICAND_BITS - I::BITS + 1 + n)
}

/// Adjust a mantissa with dropped bits
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
fn m_adj<F: Float<Int: CastInto<i32>>>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
// fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
// Branchlessly extract a `1` if rounding up should happen
let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);

// Add one when we need to round up. Break ties to even.
m_base + adj
}

/// Combine a final float repr from an exponent and mantissa.
/// Shift the exponent to its position and add the mantissa. Allows adjusting an off by one
/// exponent with an overflowing mantissa.
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
// + rather than | so the mantissa can overflow into the exponent
(e << F::SIGNIFICAND_BITS) + m
Expand All @@ -77,7 +92,7 @@ mod int_to_float {
let n = i.leading_zeros();
let (m_base, adj) = m_f_eq_i::<u32, f32>(i, n);
let m = m_adj::<f32>(m_base, adj);
let e = exp::<u32, f32>(n);
let e = exp::<u32, f32>(n) - 1;
repr::<f32>(e, m)
}

Expand All @@ -87,7 +102,7 @@ mod int_to_float {
}
let n = i.leading_zeros();
let m = m_f_gt_i::<_, f64>(i, n);
let e = exp::<u32, f64>(n);
let e = exp::<u32, f64>(n) - 1;
repr::<f64>(e, m)
}

Expand All @@ -111,12 +126,12 @@ mod int_to_float {
pub fn u64_to_f32_bits(i: u64) -> u32 {
let n = i.leading_zeros();
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
let m_base = m_base::<_, f32>(i_m);
let m_base = m_f_lt_i::<u64, f32>(i_m);
// The entire lower half of `i` will be truncated (masked portion), plus the
// next `EXPONENT_BITS` bits.
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
let m = m_adj::<f32>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) };
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
repr::<f32>(e, m)
}

Expand All @@ -127,7 +142,7 @@ mod int_to_float {
let n = i.leading_zeros();
let (m_base, adj) = m_f_eq_i::<u64, f64>(i, n);
let m = m_adj::<f64>(m_base, adj);
let e = exp::<u64, f64>(n);
let e = exp::<u64, f64>(n) - 1;
repr::<f64>(e, m)
}

Expand All @@ -138,14 +153,14 @@ mod int_to_float {
}
let n = i.leading_zeros();
let m = m_f_gt_i::<_, f128>(i, n);
let e = exp::<u64, f128>(n);
let e = exp::<u64, f128>(n) - 1;
repr::<f128>(e, m)
}

pub fn u128_to_f32_bits(i: u128) -> u32 {
let n = i.leading_zeros();
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
let m_base = m_base::<_, f32>(i_m);
let m_base = m_f_lt_i::<u128, f32>(i_m);

// Within the upper `F::BITS`, everything except for the signifcand
// gets truncated
Expand All @@ -157,19 +172,19 @@ mod int_to_float {
let adj = d1 | d2;

let m = m_adj::<f32>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) };
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
repr::<f32>(e, m)
}

pub fn u128_to_f64_bits(i: u128) -> u64 {
let n = i.leading_zeros();
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
let m_base = m_base::<_, f64>(i_m);
let m_base = m_f_lt_i::<u128, f64>(i_m);
// The entire lower half of `i` will be truncated (masked portion), plus the
// next `EXPONENT_BITS` bits.
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
let m = m_adj::<f64>(m_base, adj);
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) };
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
repr::<f64>(e, m)
}

Expand All @@ -181,7 +196,7 @@ mod int_to_float {
let n = i.leading_zeros();
let (m_base, adj) = m_f_eq_i::<u128, f128>(i, n);
let m = m_adj::<f128>(m_base, adj);
let e = exp::<u128, f128>(n);
let e = exp::<u128, f128>(n) - 1;
repr::<f128>(e, m)
}
}
Expand Down

0 comments on commit d1aa791

Please sign in to comment.