diff --git a/README.md b/README.md index 985020f5..bb04f153 100644 --- a/README.md +++ b/README.md @@ -233,12 +233,12 @@ of being added to Rust. - [x] fixunstfdi.c - [x] fixunstfsi.c - [x] fixunstfti.c -- [ ] floatditf.c -- [ ] floatsitf.c -- [ ] floattitf.c -- [ ] floatunditf.c -- [ ] floatunsitf.c -- [ ] floatuntitf.c +- [x] floatditf.c +- [x] floatsitf.c +- [x] floattitf.c +- [x] floatunditf.c +- [x] floatunsitf.c +- [x] floatuntitf.c - [x] multf3.c - [x] powitf2.c - [x] subtf3.c diff --git a/build.rs b/build.rs index df98688d..f7868f74 100644 --- a/build.rs +++ b/build.rs @@ -522,10 +522,6 @@ mod c { if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), - ("__floatditf", "floatditf.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunditf", "floatunditf.c"), - ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), ("__fe_raise_inexact", "fp_mode.c"), ]); @@ -540,21 +536,11 @@ mod c { } if target.arch == "mips64" { - sources.extend(&[ - ("__netf2", "comparetf2.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunsitf", "floatunsitf.c"), - ("__fe_getround", "fp_mode.c"), - ]); + sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); } if target.arch == "loongarch64" { - sources.extend(&[ - ("__netf2", "comparetf2.c"), - ("__floatsitf", "floatsitf.c"), - ("__floatunsitf", "floatunsitf.c"), - ("__fe_getround", "fp_mode.c"), - ]); + sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); } // Remove the assembly implementations that won't compile for the target diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs index 06d77233..368da6af 100644 --- a/examples/intrinsics.rs +++ b/examples/intrinsics.rs @@ -264,14 +264,18 @@ mod intrinsics { /* i32 operations */ + // floatsisf + pub fn aeabi_i2f(x: i32) -> f32 { + x as f32 + } + // floatsidf pub fn aeabi_i2d(x: i32) -> f64 { x as f64 } - // floatsisf - pub fn aeabi_i2f(x: i32) -> f32 { - x as f32 + pub fn floatsitf(x: i32) -> f128 { + x as f128 } pub fn aeabi_idiv(a: i32, b: i32) -> i32 { @@ -294,6 +298,10 @@ mod intrinsics { x as f64 } + pub fn floatditf(x: i64) -> f128 { + x as f128 + } + pub fn mulodi4(a: i64, b: i64) -> i64 { a * b } @@ -314,6 +322,18 @@ mod intrinsics { /* i128 operations */ + pub fn floattisf(x: i128) -> f32 { + x as f32 + } + + pub fn floattidf(x: i128) -> f64 { + x as f64 + } + + pub fn floattitf(x: i128) -> f128 { + x as f128 + } + pub fn lshrti3(a: i128, b: usize) -> i128 { a >> b } @@ -328,14 +348,18 @@ mod intrinsics { /* u32 operations */ + // floatunsisf + pub fn aeabi_ui2f(x: u32) -> f32 { + x as f32 + } + // floatunsidf pub fn aeabi_ui2d(x: u32) -> f64 { x as f64 } - // floatunsisf - pub fn aeabi_ui2f(x: u32) -> f32 { - x as f32 + pub fn floatunsitf(x: u32) -> f128 { + x as f128 } pub fn aeabi_uidiv(a: u32, b: u32) -> u32 { @@ -358,6 +382,10 @@ mod intrinsics { x as f64 } + pub fn floatunditf(x: u64) -> f128 { + x as f128 + } + // udivdi3 pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 { a * b @@ -369,6 +397,18 @@ mod intrinsics { /* u128 operations */ + pub fn floatuntisf(x: u128) -> f32 { + x as f32 + } + + pub fn floatuntidf(x: u128) -> f64 { + x as f64 + } + + pub fn floatuntitf(x: u128) -> f128 { + x as f128 + } + pub fn muloti4(a: u128, b: u128) -> Option { a.checked_mul(b) } @@ -466,6 +506,16 @@ fn run() { bb(fixunstfsi(bb(2.))); #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] bb(fixunstfti(bb(2.))); + bb(floatditf(bb(2))); + bb(floatsitf(bb(2))); + bb(floattidf(bb(2))); + bb(floattisf(bb(2))); + bb(floattitf(bb(2))); + bb(floatunditf(bb(2))); + bb(floatunsitf(bb(2))); + bb(floatuntidf(bb(2))); + bb(floatuntisf(bb(2))); + bb(floatuntitf(bb(2))); bb(gttf(bb(2.), bb(2.))); bb(lshrti3(bb(2), bb(2))); bb(lttf(bb(2.), bb(2.))); diff --git a/src/float/conv.rs b/src/float/conv.rs index 22ff9912..eefb0573 100644 --- a/src/float/conv.rs +++ b/src/float/conv.rs @@ -104,6 +104,24 @@ mod int_to_float { repr::(e, m) } + #[cfg(f128_enabled)] + pub fn u32_to_f128_bits(i: u32) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + + // Shift into mantissa position that is correct for the type, but shifted into the lower + // 64 bits over so can can avoid 128-bit math. + let m = (i as u64) << (shift_f_gt_i::(n) - 64); + let e = exp::(n) as u64 - 1; + // High 64 bits of f128 representation. + let h = (e << (f128::SIGNIFICAND_BITS - 64)) + m; + + // Shift back to the high bits, the rest of the mantissa will always be 0. + (h as u128) << 64 + } + pub fn u64_to_f32_bits(i: u64) -> u32 { let n = i.leading_zeros(); let i_m = i.wrapping_shl(n); @@ -130,6 +148,18 @@ mod int_to_float { repr::(e, m) } + #[cfg(f128_enabled)] + pub fn u64_to_f128_bits(i: u64) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m = (i as u128) << shift_f_gt_i::(n); + let e = exp::(n) - 1; + repr::(e, m) + } + pub fn u128_to_f32_bits(i: u128) -> u32 { let n = i.leading_zeros(); let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero @@ -162,6 +192,20 @@ mod int_to_float { let e = if i == 0 { 0 } else { exp::(n) - 1 }; repr::(e, m) } + + #[cfg(f128_enabled)] + pub fn u128_to_f128_bits(i: u128) -> u128 { + if i == 0 { + return 0; + } + let n = i.leading_zeros(); + // Mantissa with implicit bit set + let m_base = (i << n) >> f128::EXPONENT_BITS; + let adj = (i << n) << (f128::SIGNIFICAND_BITS + 1); + let m = m_adj::(m_base, adj); + let e = exp::(n) - 1; + repr::(e, m) + } } // Conversions from unsigned integers to floats. @@ -195,6 +239,24 @@ intrinsics! { pub extern "C" fn __floatuntidf(i: u128) -> f64 { f64::from_bits(int_to_float::u128_to_f64_bits(i)) } + + #[ppc_alias = __floatunsikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatunsitf(i: u32) -> f128 { + f128::from_bits(int_to_float::u32_to_f128_bits(i)) + } + + #[ppc_alias = __floatundikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatunditf(i: u64) -> f128 { + f128::from_bits(int_to_float::u64_to_f128_bits(i)) + } + + #[ppc_alias = __floatuntikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatuntitf(i: u128) -> f128 { + f128::from_bits(int_to_float::u128_to_f128_bits(i)) + } } // Conversions from signed integers to floats. @@ -228,6 +290,24 @@ intrinsics! { pub extern "C" fn __floattidf(i: i128) -> f64 { int_to_float::signed(i, int_to_float::u128_to_f64_bits) } + + #[ppc_alias = __floatsikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatsitf(i: i32) -> f128 { + int_to_float::signed(i, int_to_float::u32_to_f128_bits) + } + + #[ppc_alias = __floatdikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floatditf(i: i64) -> f128 { + int_to_float::signed(i, int_to_float::u64_to_f128_bits) + } + + #[ppc_alias = __floattikf] + #[cfg(f128_enabled)] + pub extern "C" fn __floattitf(i: i128) -> f128 { + int_to_float::signed(i, int_to_float::u128_to_f128_bits) + } } /// Generic float to unsigned int conversions. diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs index de2043b0..0625a1ae 100644 --- a/testcrate/benches/float_conv.rs +++ b/testcrate/benches/float_conv.rs @@ -1,7 +1,8 @@ #![allow(improper_ctypes)] +#![cfg_attr(f128_enabled, feature(f128))] use compiler_builtins::float::conv; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_main, Criterion}; use testcrate::float_bench; /* unsigned int -> float */ @@ -76,6 +77,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u32_f128, + sig: (a: u32) -> f128, + crate_fn: conv::__floatunsitf, + crate_fn_ppc: conv::__floatunsikf, + sys_fn: __floatunsitf, + sys_fn_ppc: __floatunsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_u64_f32, sig: (a: u64) -> f32, @@ -118,6 +131,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u64_f128, + sig: (a: u64) -> f128, + crate_fn: conv::__floatunditf, + crate_fn_ppc: conv::__floatundikf, + sys_fn: __floatunditf, + sys_fn_ppc: __floatundikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_u128_f32, sig: (a: u128) -> f32, @@ -136,6 +161,18 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_u128_f128, + sig: (a: u128) -> f128, + crate_fn: conv::__floatuntitf, + crate_fn_ppc: conv::__floatuntikf, + sys_fn: __floatuntitf, + sys_fn_ppc: __floatuntikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* signed int -> float */ float_bench! { @@ -205,6 +242,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i32_f128, + sig: (a: i32) -> f128, + crate_fn: conv::__floatsitf, + crate_fn_ppc: conv::__floatsikf, + sys_fn: __floatsitf, + sys_fn_ppc: __floatsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_i64_f32, sig: (a: i64) -> f32, @@ -272,6 +321,18 @@ float_bench! { ], } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i64_f128, + sig: (a: i64) -> f128, + crate_fn: conv::__floatditf, + crate_fn_ppc: conv::__floatdikf, + sys_fn: __floatditf, + sys_fn_ppc: __floatdikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + float_bench! { name: conv_i128_f32, sig: (a: i128) -> f32, @@ -290,6 +351,18 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_i128_f128, + sig: (a: i128) -> f128, + crate_fn: conv::__floattitf, + crate_fn_ppc: conv::__floattikf, + sys_fn: __floattitf, + sys_fn_ppc: __floattikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* float -> unsigned int */ #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] @@ -397,6 +470,39 @@ float_bench! { asm: [] } +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u32, + sig: (a: f128) -> u32, + crate_fn: conv::__fixunstfsi, + crate_fn_ppc: conv::__fixunskfsi, + sys_fn: __fixunstfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u64, + sig: (a: f128) -> u64, + crate_fn: conv::__fixunstfdi, + crate_fn_ppc: conv::__fixunskfdi, + sys_fn: __fixunstfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u128, + sig: (a: f128) -> u128, + crate_fn: conv::__fixunstfti, + crate_fn_ppc: conv::__fixunskfti, + sys_fn: __fixunstfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + /* float -> signed int */ #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] @@ -504,43 +610,79 @@ float_bench! { asm: [] } -criterion_group!( - float_conv, - conv_u32_f32, - conv_u32_f64, - conv_u64_f32, - conv_u64_f64, - conv_u128_f32, - conv_u128_f64, - conv_i32_f32, - conv_i32_f64, - conv_i64_f32, - conv_i64_f64, - conv_i128_f32, - conv_i128_f64, - conv_f64_u32, - conv_f64_u64, - conv_f64_u128, - conv_f64_i32, - conv_f64_i64, - conv_f64_i128, -); - -// FIXME: ppc64le has a sporadic overflow panic in the crate functions -// -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] -criterion_group!( - float_conv_not_ppc64le, - conv_f32_u32, - conv_f32_u64, - conv_f32_u128, - conv_f32_i32, - conv_f32_i64, - conv_f32_i128, -); - -#[cfg(all(target_arch = "powerpc64", target_endian = "little"))] -criterion_main!(float_conv); +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i32, + sig: (a: f128) -> i32, + crate_fn: conv::__fixtfsi, + crate_fn_ppc: conv::__fixkfsi, + sys_fn: __fixtfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} -#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] -criterion_main!(float_conv, float_conv_not_ppc64le); +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i64, + sig: (a: f128) -> i64, + crate_fn: conv::__fixtfdi, + crate_fn_ppc: conv::__fixkfdi, + sys_fn: __fixtfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i128, + sig: (a: f128) -> i128, + crate_fn: conv::__fixtfti, + crate_fn_ppc: conv::__fixkfti, + sys_fn: __fixtfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +pub fn float_conv() { + let mut criterion = Criterion::default().configure_from_args(); + + conv_u32_f32(&mut criterion); + conv_u32_f64(&mut criterion); + conv_u64_f32(&mut criterion); + conv_u64_f64(&mut criterion); + conv_u128_f32(&mut criterion); + conv_u128_f64(&mut criterion); + conv_i32_f32(&mut criterion); + conv_i32_f64(&mut criterion); + conv_i64_f32(&mut criterion); + conv_i64_f64(&mut criterion); + conv_i128_f32(&mut criterion); + conv_i128_f64(&mut criterion); + conv_f64_u32(&mut criterion); + conv_f64_u64(&mut criterion); + conv_f64_u128(&mut criterion); + conv_f64_i32(&mut criterion); + conv_f64_i64(&mut criterion); + conv_f64_i128(&mut criterion); + + #[cfg(all(f128_enabled))] + // FIXME: ppc64le has a sporadic overflow panic in the crate functions + // + #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] + { + conv_u32_f128(&mut criterion); + conv_u64_f128(&mut criterion); + conv_u128_f128(&mut criterion); + conv_i32_f128(&mut criterion); + conv_i64_f128(&mut criterion); + conv_i128_f128(&mut criterion); + conv_f128_u32(&mut criterion); + conv_f128_u64(&mut criterion); + conv_f128_u128(&mut criterion); + conv_f128_i32(&mut criterion); + conv_f128_i64(&mut criterion); + conv_f128_i128(&mut criterion); + } +} + +criterion_main!(float_conv); diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs index 01cc588c..a08748af 100644 --- a/testcrate/tests/conv.rs +++ b/testcrate/tests/conv.rs @@ -117,6 +117,28 @@ mod i_to_f { u128, __floatuntidf; i128, __floattidf; } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsitf; + i32, __floatsitf; + u64, __floatunditf; + i64, __floatditf; + u128, __floatuntitf; + i128, __floattitf; + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsikf; + i32, __floatsikf; + u64, __floatundikf; + i64, __floatdikf; + u128, __floatuntikf; + i128, __floattikf; + } } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520