diff --git a/ci/run.sh b/ci/run.sh index 847b5243..2eafd1b4 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -24,6 +24,7 @@ else run="cargo test --manifest-path testcrate/Cargo.toml --no-fail-fast --target $target" $run $run --release + $run --benches $run --features c $run --features c --release $run --features no-asm diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 6f771181..86bcb4b4 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -21,6 +21,10 @@ path = ".." default-features = false features = ["public-test-deps"] +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false } +paste = "1.0.15" + [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] test = { git = "https://github.com/japaric/utest" } utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" } @@ -35,3 +39,35 @@ mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] # Skip tests that rely on f128 symbols being available on the system no-sys-f128 = [] + +[[bench]] +name = "float_add" +harness = false + +[[bench]] +name = "float_sub" +harness = false + +[[bench]] +name = "float_mul" +harness = false + +[[bench]] +name = "float_div" +harness = false + +[[bench]] +name = "float_cmp" +harness = false + +[[bench]] +name = "float_conv" +harness = false + +[[bench]] +name = "float_extend" +harness = false + +[[bench]] +name = "float_trunc" +harness = false diff --git a/testcrate/benches/float_add.rs b/testcrate/benches/float_add.rs new file mode 100644 index 00000000..3eec169c --- /dev/null +++ b/testcrate/benches/float_add.rs @@ -0,0 +1,59 @@ +#![feature(f128)] + +use compiler_builtins::float::add; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: add_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: add::__addsf3, + sys_fn: __addsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "addss xmm0, xmm1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fadd s0, s0, s1", + "ret", + ); + ], +} + +float_bench! { + name: add_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: add::__adddf3, + sys_fn: __adddf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "addsd xmm0, xmm1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fadd d0, d0, d1", + "ret", + ); + ], +} + +float_bench! { + name: add_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: add::__addtf3, + sys_fn: __addtf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_add, add_f32, add_f64, add_f128); +criterion_main!(float_add); diff --git a/testcrate/benches/float_cmp.rs b/testcrate/benches/float_cmp.rs new file mode 100644 index 00000000..5117b621 --- /dev/null +++ b/testcrate/benches/float_cmp.rs @@ -0,0 +1,131 @@ +#![feature(f128)] + +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +use compiler_builtins::float::cmp; + +float_bench! { + name: cmp_f32_gt, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__gtsf2, + sys_fn: __gtsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "xor eax, eax", + "ucomiss xmm0, xmm1", + "seta al", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fcmp s0, s1", + "cset w0, gt", + "ret", + ); + ], +} + +float_bench! { + name: cmp_f32_unord, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__unordsf2, + sys_fn: __unordsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "cmpneqss xmm0, xmm1", + "movd eax, xmm0", + "and eax, 1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fcmp s0, s1", + "cset w0, eq", + "ret", + ); + ], +} + +float_bench! { + name: cmp_f64_gt, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__gtdf2, + sys_fn: __gtdf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "xor eax, eax", + "ucomisd xmm0, xmm1", + "seta al", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fcmp d0, d1", + "cset w0, gt", + "ret", + ); + ], +} + +float_bench! { + name: cmp_f64_unord, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__unorddf2, + sys_fn: __unorddf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "cmpeqsd xmm0, xmm1", + "movq rax, xmm0", + "and eax, 1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fcmp d0, d1", + "cset w0, eq", + "ret", + ); + ], +} + +float_bench! { + name: cmp_f128_gt, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__gttf2, + sys_fn: __gttf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +float_bench! { + name: cmp_f128_unord, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__unordtf2, + sys_fn: __unordtf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!( + float_cmp, + cmp_f32_gt, + cmp_f32_unord, + cmp_f64_gt, + cmp_f64_unord, + cmp_f128_gt, + cmp_f128_unord +); +criterion_main!(float_cmp); diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs new file mode 100644 index 00000000..599816b0 --- /dev/null +++ b/testcrate/benches/float_conv.rs @@ -0,0 +1,406 @@ +#![feature(f128)] +#![allow(improper_ctypes)] + +use compiler_builtins::float::conv; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +/* unsigned int -> float */ + +float_bench! { + name: conv_u32_f32, + sig: (i: u32) -> f32, + crate_fn: conv::__floatunsisf, + sys_fn: __floatunsisf, + sys_available: all(), + asm: [ + #[cfg(all(target_arch = "x86_64", not(target_family = "windows")))] + asm!( + "mov eax, edi", + "cvtsi2ss xmm0, rax", + "ret", + ); + + #[cfg(all(target_arch = "x86_64", target_family = "windows"))] + asm!( + "mov eax, ecx", + "cvtsi2ss xmm0, rax", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "ucvtf s0, w0", + "ret", + ); + ], +} + +float_bench! { + name: conv_u32_f64, + sig: (i: u32) -> f64, + crate_fn: conv::__floatunsidf, + sys_fn: __floatunsidf, + sys_available: all(), + asm: [ + #[cfg(all(target_arch = "x86_64", not(target_family = "windows")))] + asm!( + "mov eax, edi", + "cvtsi2sd xmm0, rax", + "ret", + ); + + #[cfg(all(target_arch = "x86_64", target_family = "windows"))] + asm!( + "mov eax, ecx", + "cvtsi2sd xmm0, rax", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "ucvtf d0, w0", + "ret", + ); + ], +} + +float_bench! { + name: conv_u64_f32, + sig: (i: u64) -> f32, + crate_fn: conv::__floatundisf, + sys_fn: __floatundisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "ucvtf s0, x0", + "ret", + ); + ], +} + +float_bench! { + name: conv_u64_f64, + sig: (i: u64) -> f64, + crate_fn: conv::__floatundidf, + sys_fn: __floatundidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "ucvtf d0, x0", + "ret", + ); + ], +} + +float_bench! { + name: conv_u128_f32, + sig: (i: u128) -> f32, + crate_fn: conv::__floatuntisf, + sys_fn: __floatuntisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_u128_f64, + sig: (i: u128) -> f64, + crate_fn: conv::__floatuntidf, + sys_fn: __floatuntidf, + sys_available: all(), + asm: [] +} + +/* signed int -> float */ + +float_bench! { + name: conv_i32_f32, + sig: (i: i32) -> f32, + crate_fn: conv::__floatsisf, + sys_fn: __floatsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "cvtsi2ss xmm0, edi", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "scvtf s0, w0", + "ret", + + ); + ], +} + +float_bench! { + name: conv_i32_f64, + sig: (i: i32) -> f64, + crate_fn: conv::__floatsidf, + sys_fn: __floatsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "cvtsi2sd xmm0, edi", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "scvtf d0, w0", + "ret", + ); + ], +} + +float_bench! { + name: conv_i64_f32, + sig: (i: i64) -> f32, + crate_fn: conv::__floatdisf, + sys_fn: __floatdisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "cvtsi2ss xmm0, rdi", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "scvtf s0, x0", + "ret", + ); + ], +} + +float_bench! { + name: conv_i64_f64, + sig: (i: i64) -> f64, + crate_fn: conv::__floatdidf, + sys_fn: __floatdidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "cvtsi2sd xmm0, rdi", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "scvtf d0, x0", + "ret", + ); + ], +} + +float_bench! { + name: conv_i128_f32, + sig: (i: i128) -> f32, + crate_fn: conv::__floattisf, + sys_fn: __floattisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_i128_f64, + sig: (i: i128) -> f64, + crate_fn: conv::__floattidf, + sys_fn: __floattidf, + sys_available: all(), + asm: [] +} + +/* float -> unsigned int */ + +float_bench! { + name: conv_f32_u32, + sig: (f: f32) -> u32, + crate_fn: conv::__fixunssfsi, + sys_fn: __fixunssfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvtzu w0, s0", + "ret", + ); + ], +} +float_bench! { + name: conv_f32_u64, + sig: (f: f32) -> u64, + crate_fn: conv::__fixunssfdi, + sys_fn: __fixunssfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvtzu x0, s0", + "ret", + ); + ], +} + +float_bench! { + name: conv_f32_u128, + sig: (f: f32) -> u128, + crate_fn: conv::__fixunssfti, + sys_fn: __fixunssfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_u32, + sig: (f: f64) -> u32, + crate_fn: conv::__fixunsdfsi, + sys_fn: __fixunsdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvtzu w0, d0", + "ret", + ); + ], +} + +float_bench! { + name: conv_f64_u64, + sig: (f: f64) -> u64, + crate_fn: conv::__fixunsdfdi, + sys_fn: __fixunsdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvtzu x0, d0", + "ret", + ); + ], +} + +float_bench! { + name: conv_f64_u128, + sig: (f: f64) -> u128, + crate_fn: conv::__fixunsdfti, + sys_fn: __fixunsdfti, + sys_available: all(), + asm: [] +} + +/* float -> signed int */ + +float_bench! { + name: conv_f32_i32, + sig: (f: f32) -> i32, + crate_fn: conv::__fixsfsi, + sys_fn: __fixsfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvtzs w0, s0", + "ret", + ); + ], +} +float_bench! { + name: conv_f32_i64, + sig: (f: f32) -> i64, + crate_fn: conv::__fixsfdi, + sys_fn: __fixsfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvtzs x0, s0", + "ret", + ); + ], +} + +float_bench! { + name: conv_f32_i128, + sig: (f: f32) -> i128, + crate_fn: conv::__fixsfti, + sys_fn: __fixsfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_i32, + sig: (f: f64) -> i32, + crate_fn: conv::__fixdfsi, + sys_fn: __fixdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvtzs w0, d0", + "ret", + ); + ], +} + +float_bench! { + name: conv_f64_i64, + sig: (f: f64) -> i64, + crate_fn: conv::__fixdfdi, + sys_fn: __fixdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvtzs x0, d0", + "ret", + ); + ], +} + +float_bench! { + name: conv_f64_i128, + sig: (f: f64) -> i128, + crate_fn: conv::__fixdfti, + sys_fn: __fixdfti, + sys_available: all(), + asm: [] +} + +criterion_group!( + float_conv, + conv_u32_f32, + conv_u32_f64, + conv_u64_f32, + conv_u64_f64, + conv_u128_f32, + conv_u128_f64, + conv_i32_f32, + conv_i32_f64, + conv_i64_f32, + conv_i64_f64, + conv_i128_f32, + conv_i128_f64, + conv_f32_u32, + conv_f32_u64, + conv_f32_u128, + conv_f32_i32, + conv_f32_i64, + conv_f32_i128, + conv_f64_u32, + conv_f64_u64, + conv_f64_u128, + conv_f64_i32, + conv_f64_i64, + conv_f64_i128, +); +criterion_main!(float_conv); diff --git a/testcrate/benches/float_div.rs b/testcrate/benches/float_div.rs new file mode 100644 index 00000000..79a4514f --- /dev/null +++ b/testcrate/benches/float_div.rs @@ -0,0 +1,50 @@ +#![feature(f128)] + +use compiler_builtins::float::div; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: div_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: div::__divsf3, + sys_fn: __divsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "divss xmm0, xmm1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fdiv s0, s0, s1", + "ret", + ); + ], +} + +float_bench! { + name: div_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: div::__divdf3, + sys_fn: __divdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "divsd xmm0, xmm1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fdiv d0, d0, d1", + "ret", + ); + ], +} + +criterion_group!(float_div, div_f32, div_f64); +criterion_main!(float_div); diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs new file mode 100644 index 00000000..6985d242 --- /dev/null +++ b/testcrate/benches/float_extend.rs @@ -0,0 +1,73 @@ +#![feature(f128)] +#![feature(f16)] + +use compiler_builtins::float::extend; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: extend_f16_f32, + sig: (f: f16) -> f32, + crate_fn: extend::__extendhfsf2, + sys_fn: __extendhfsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvt s0, h0", + "ret", + ); + ], +} + +float_bench! { + name: extend_f16_f128, + sig: (f: f16) -> f128, + crate_fn: extend::__extendhftf2, + sys_fn: __extendhftf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: extend_f32_f64, + sig: (f: f32) -> f64, + crate_fn: extend::__extendsfdf2, + sys_fn: __extendsfdf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvt d0, s0", + "ret", + ); + ], +} + +float_bench! { + name: extend_f32_f128, + sig: (f: f32) -> f128, + crate_fn: extend::__extendsftf2, + sys_fn: __extendsftf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: extend_f64_f128, + sig: (f: f64) -> f128, + crate_fn: extend::__extenddftf2, + sys_fn: __extenddftf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +criterion_group!( + float_extend, + extend_f16_f32, + extend_f16_f128, + extend_f32_f64, + extend_f32_f128, + extend_f64_f128, +); +criterion_main!(float_extend); diff --git a/testcrate/benches/float_mul.rs b/testcrate/benches/float_mul.rs new file mode 100644 index 00000000..daaeb20c --- /dev/null +++ b/testcrate/benches/float_mul.rs @@ -0,0 +1,59 @@ +#![feature(f128)] + +use compiler_builtins::float::mul; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: mul_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: mul::__mulsf3, + sys_fn: __mulsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "mulss xmm0, xmm1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fmul s0, s0, s1", + "ret", + ); + ], +} + +float_bench! { + name: mul_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: mul::__muldf3, + sys_fn: __muldf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "mulsd xmm0, xmm1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fmul d0, d0, d1", + "ret", + ); + ], +} + +float_bench! { + name: mul_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: mul::__multf3, + sys_fn: __multf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_mul, mul_f32, mul_f64, mul_f128); +criterion_main!(float_mul); diff --git a/testcrate/benches/float_sub.rs b/testcrate/benches/float_sub.rs new file mode 100644 index 00000000..19b20a26 --- /dev/null +++ b/testcrate/benches/float_sub.rs @@ -0,0 +1,59 @@ +#![feature(f128)] + +use compiler_builtins::float::sub; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: sub_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: sub::__subsf3, + sys_fn: __subsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "subss xmm0, xmm1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fsub s0, s0, s1", + "ret", + ); + ], +} + +float_bench! { + name: sub_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: sub::__subdf3, + sys_fn: __subdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "subsd xmm0, xmm1", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fsub d0, d0, d1", + "ret", + ); + ], +} + +float_bench! { + name: sub_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: sub::__subtf3, + sys_fn: __subtf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_sub, sub_f32, sub_f64, sub_f128); +criterion_main!(float_sub); diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs new file mode 100644 index 00000000..860e6d84 --- /dev/null +++ b/testcrate/benches/float_trunc.rs @@ -0,0 +1,95 @@ +#![feature(f128)] +#![feature(f16)] + +use compiler_builtins::float::trunc; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: trunc_f32_f16, + sig: (f: f32) -> f16, + crate_fn: trunc::__truncsfhf2, + sys_fn: __truncsfhf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvt h0, s0", + "ret", + ); + ], +} + +float_bench! { + name: trunc_f64_f16, + sig: (f: f64) -> f16, + crate_fn: trunc::__truncdfhf2, + sys_fn: __truncdfhf2, + sys_available: not(feature = "no-sys-f128"), + asm: [ + #[cfg(target_arch = "aarch64")] + asm!( + "fcvt h0, d0", + "ret", + ); + ], +} + +float_bench! { + name: trunc_f64_f32, + sig: (f: f64) -> f32, + crate_fn: trunc::__truncdfsf2, + sys_fn: __truncdfsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] + asm!( + "cvtsd2ss xmm0, xmm0", + "ret", + ); + + #[cfg(target_arch = "aarch64")] + asm!( + "fcvt s0, d0", + "ret", + ); + ], +} + +float_bench! { + name: trunc_f128_f16, + sig: (f: f128) -> f16, + crate_fn: trunc::__trunctfhf2, + sys_fn: __trunctfhf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: trunc_f128_f32, + sig: (f: f128) -> f32, + crate_fn: trunc::__trunctfsf2, + sys_fn: __trunctfsf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: trunc_f128_f64, + sig: (f: f128) -> f64, + crate_fn: trunc::__trunctfdf2, + sys_fn: __trunctfdf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +criterion_group!( + float_trunc, + trunc_f32_f16, + trunc_f64_f16, + trunc_f64_f32, + trunc_f128_f16, + trunc_f128_f32, + trunc_f128_f64, +); +criterion_main!(float_trunc); diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs new file mode 100644 index 00000000..fb7e5266 --- /dev/null +++ b/testcrate/src/bench.rs @@ -0,0 +1,268 @@ +use core::cell::RefCell; + +use alloc::vec::Vec; +use compiler_builtins::float::Float; + +/// Fuzz with these many items to ensure equal functions +pub const CHECK_ITER_ITEMS: u32 = 10_000; +/// Benchmark with this many items to get a variety +pub const BENCH_ITER_ITEMS: u32 = 500; + +/// Still run benchmarks but don't check correctness between compiler-builtins and +/// system functions +pub const SKIP_SYS_CHECKS: &[&str] = &[ + // FIXME: some sort of precision error (tested on aarch64) + "extend_f16_f32", + "trunc_f32_f16", + // We return -1, system functions on x86 return -2 + "cmp_f128_gt", + // FIXME: rounding error + // + "mul_f128", + // System symbols do the wrong thing + // + "trunc_f64_f16", +]; + +/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten +/// assembly. +/// +/// `asm!` gets turned into global assembly, more or less a naked function. +#[macro_export] +macro_rules! float_bench { + ( + // Name of this benchmark + name: $name:ident, + // The function signature to be tested + sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty, + // Path to the crate in compiler_builtins + crate_fn: $crate_fn:path, + // Name of the system symbol + sys_fn: $sys_fn:ident, + // Meta saying whether the system symbol is available + sys_available: $sys_available:meta, + // Assembly implementations, if any. + asm: [ + $( + #[$asm_meta:meta] + asm!($($asm_tt:tt)*) + );* + $(;)? + ] + $(,)? + ) => {paste::paste! { + #[allow(dead_code)] + extern "C" { + /// Assembly function name + fn [<$name _asm>]($($arg: $arg_ty),*) -> $ret_ty; + + /// Binding for the system function + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + } + + $( + #[$asm_meta] + #[cfg(not(target_vendor = "apple"))] + core::arch::global_asm!( + concat!(".global ", stringify!([<$name _asm>])), + concat!(stringify!([<$name _asm>]), ":"), + $($asm_tt)* + ); + + #[$asm_meta] + #[cfg(target_vendor = "apple")] + core::arch::global_asm!( + // mac targets have a leading `_` in assembly symbol names + concat!(".global _", stringify!([<$name _asm>])), + concat!("_", stringify!([<$name _asm>]), ":"), + $($asm_tt)* + ); + )* + + fn $name(c: &mut Criterion) { + use core::hint::black_box; + use compiler_builtins::float::Float; + use $crate::bench::BenchType; + + #[inline(never)] // equalize with external calls + fn crate_fn($($arg: $arg_ty),*) -> $ret_ty { + $crate_fn( $($arg),* ) + } + + #[inline(always)] // already a branch + #[cfg($sys_available)] + fn sys_fn($($arg: $arg_ty),*) -> $ret_ty { + unsafe { $sys_fn( $($arg),* ) } + } + + #[inline(never)] // equalize with external calls + fn asm_fn($($arg: $arg_ty),*) -> $ret_ty { + unsafe { [<$name _asm>]( $($arg),* ) } + } + + let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS); + let benchvec= <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS); + let title = stringify!($name); + + // Verify math lines up + + #[cfg($sys_available)] + for ($($arg),*) in testvec.iter().copied() { + if $crate::bench::SKIP_SYS_CHECKS.contains(&title) { + continue; + } + + let crate_res = crate_fn($($arg),*); + let sys_res = sys_fn($($arg),*); + assert!( + $ret_ty::check_eq(crate_res, sys_res), + "{title}{:?}: crate: {crate_res:?}, sys: {sys_res:?}", + ($($arg),* ,) + ); + } + + // use a binding to get around nested macro repetition + let do_asm_check = || { + for ($($arg),*) in testvec.iter().copied() { + // FIXME: these fail for float multiplication + // + if title.contains("mul") + // cmp is skipped because builtins do spaceship but assembly does + // a single operation. + || title.contains("cmp") { + continue; + } + + let crate_res = crate_fn($($arg),*); + let asm_res = asm_fn($($arg),*); + + assert!( + $ret_ty::check_eq(crate_res, asm_res), + "{title}{:?}: crate: {crate_res:?}, asm: {asm_res:?}", + ($($arg),* ,) + ); + } + }; + $( + #[$asm_meta] + do_asm_check(); + )* + + c.bench_function(&format!("{title} compiler-builtins"), |b| { + b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(crate_fn( $(black_box($arg)),* )); + } + }) + }); + + #[cfg($sys_available)] + c.bench_function(&format!("{title} system"), |b| { + b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(sys_fn( $(black_box($arg)),* )); + } + }) + }); + + // use a binding to get around nested macro repetition + let mut do_asm_bench = || { + c.bench_function(&format!( + "{title} assembly {} {}", std::env::consts::ARCH, std::env::consts::FAMILY + ), |b| { + b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(asm_fn( $(black_box($arg)),* )); + } + }) + }); + }; + $( + #[$asm_meta] + do_asm_bench(); + )* + } + }}; + + (@coalesce $a:ty, $b:ty) => { $a }; + (@coalesce , $b:ty) => { $b }; + + // Default to float comparison + (@eq $f_ty:ty,) => { + <$f_ty as Float>::eq_repr + }; + // Use normal eq if the return type is not a float + (@eq $f_ty:ty, $ret_ty:ty) => { + |a: $ret_ty, b: $ret_ty| a == b + }; + +} + +/// A type used as either an input or output to/from a benchmark function. +pub trait BenchType: Sized { + fn make_testvec(len: u32) -> Vec; + fn check_eq(a: Self, b: Self) -> bool; +} + +macro_rules! impl_benchtype { + (float $($f_ty:ty),+) => {$( + impl BenchType for $f_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + Float::eq_repr(a, b) + } + } + + impl BenchType for ($f_ty, $f_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + (int $($i_ty:ty),+) => {$( + impl BenchType for $i_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + a == b + } + } + + impl BenchType for ($i_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; +} + +#[cfg(not(feature = "no-f16-f128"))] +impl_benchtype!(float f16, f128); +impl_benchtype!(float f32, f64); +impl_benchtype!(int i16, i32, i64, i128); +impl_benchtype!(int u16, u32, u64, u128); diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 1f3a4b82..66a684d3 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -13,6 +13,11 @@ //! Some floating point tests are disabled for specific architectures, because they do not have //! correct rounding. #![no_std] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] + +pub mod bench; +extern crate alloc; use compiler_builtins::float::Float; use compiler_builtins::int::{Int, MinInt};