From d15c454bedc05775b5080e1d2130b0554d5e5a81 Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Mon, 5 Feb 2024 09:19:34 -0500 Subject: [PATCH] [FPEnv][AMDGPU] Correct strictfp tests. Correct AMDGPU strictfp tests to follow the rules documented in the LangRef: https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics These tests needed the strictfp attribute added to function calls and some declarations. Some of the tests now pass with D146845, others get farther along and fail with D146845. The tests revealed that further work is required in mostly AMDGPU atomics to get the tests passing. Since I was here anyway I removed the strictfp attribute from some constrained intrinsic declarations. They have this attribute by default. Test changes verified with D146845. --- .../AMDGPU/fsub-as-fneg-src-modifier.ll | 9 +++++--- .../AMDGPU/global_atomics_scan_fadd.ll | 6 ++--- .../AMDGPU/global_atomics_scan_fsub.ll | 6 ++--- llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll | 2 +- llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll | 23 +++++++++---------- .../AMDGPU/strictfp_f16_abi_promote.ll | 16 ++++++------- 6 files changed, 32 insertions(+), 30 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll index 9896373b08d1fa..85286841cbcac9 100644 --- a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll +++ b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll @@ -943,7 +943,7 @@ define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1 ret <2 x half> %mul } -define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 { +define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #3 { ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -955,7 +955,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) ret float %mul } -define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 { +define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #4 { ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -967,7 +967,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) ret float %mul } -define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 { +define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #5 { ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1316,3 +1316,6 @@ declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) attributes #0 = { "denormal-fp-math"="ieee,ieee" } attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #2 = { "denormal-fp-math"="dynamic,dynamic" } +attributes #3 = { "denormal-fp-math"="ieee,ieee" strictfp } +attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" strictfp } +attributes #5 = { "denormal-fp-math"="dynamic,dynamic" strictfp } diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index df254a059d9f37..6eec8d5356ca80 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -2276,7 +2276,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB3_2 ; GFX1132-DPP-NEXT: .LBB3_3: ; GFX1132-DPP-NEXT: s_endpgm - %divValue = call float @div.float.value() + %divValue = call float @div.float.value() strictfp %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic ret void } @@ -4174,7 +4174,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: global_atomic_add_f32 v4, v0, s[0:1] ; GFX1132-DPP-NEXT: .LBB6_2: ; GFX1132-DPP-NEXT: s_endpgm - %divValue = call float @div.float.value() + %divValue = call float @div.float.value() strictfp %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic ret void } @@ -5403,7 +5403,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scop ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2 ; GFX1132-DPP-NEXT: .LBB8_3: ; GFX1132-DPP-NEXT: s_endpgm - %divValue = call float @div.float.value() + %divValue = call float @div.float.value() strictfp %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue monotonic, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll index 5f4d0302ab38e0..c927a0e1ef06c1 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll @@ -2380,7 +2380,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB3_2 ; GFX1132-DPP-NEXT: .LBB3_3: ; GFX1132-DPP-NEXT: s_endpgm - %divValue = call float @div.float.value() + %divValue = call float @div.float.value() strictfp %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic ret void } @@ -4382,7 +4382,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-DPP-NEXT: .LBB6_3: ; GFX1132-DPP-NEXT: s_endpgm - %divValue = call float @div.float.value() + %divValue = call float @div.float.value() strictfp %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic ret void } @@ -5611,7 +5611,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scop ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2 ; GFX1132-DPP-NEXT: .LBB8_3: ; GFX1132-DPP-NEXT: s_endpgm - %divValue = call float @div.float.value() + %divValue = call float @div.float.value() strictfp %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue monotonic, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll index 6c920e84bcdb18..aca7d3c720ceb5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll @@ -76,7 +76,7 @@ define i32 @strictfp_func_fpmode_i32() strictfp { ; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] - %fpmode = call i32 @llvm.get.fpmode.i32() + %fpmode = call i32 @llvm.get.fpmode.i32() strictfp ret i32 %fpmode } diff --git a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll index 965040d0d879c8..0f80327638a9cb 100644 --- a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll @@ -374,24 +374,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float % ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, |v0| ; GFX1011-NEXT: s_setpc_b64 s[30:31] - %abs.arg = call float @llvm.fabs.f32(float %arg) + %abs.arg = call float @llvm.fabs.f32(float %arg) #0 %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict") ret void } -declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #1 -declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #1 -declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #1 +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) +declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) +declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) -declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1 -declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) #1 -declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) -declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) #1 -declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) #1 -declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata) #1 +declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) +declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) +declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata) -declare float @llvm.fabs.f32(float) #1 +declare float @llvm.fabs.f32(float) attributes #0 = { strictfp } -attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } diff --git a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll index 468a8463a06d6f..3bf7fec81c0413 100644 --- a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll +++ b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s -declare void @f16_user(half) -declare half @f16_result() +declare void @f16_user(half) #0 +declare half @f16_result() #0 -declare void @v2f16_user(<2 x half>) -declare <2 x half> @v2f16_result() +declare void @v2f16_user(<2 x half>) #0 +declare <2 x half> @v2f16_result() #0 -declare void @v4f16_user(<4 x half>) -declare <4 x half> @v4f16_result() +declare void @v4f16_user(<4 x half>) #0 +declare <4 x half> @v4f16_result() #0 -declare void @v8f16_user(<8 x half>) -declare <8 x half> @v8f16_result() +declare void @v8f16_user(<8 x half>) #0 +declare <8 x half> @v8f16_result() #0 define void @f16_arg(half %arg, ptr %ptr) #0 { ; GFX7-LABEL: f16_arg: