Skip to content

Commit

Permalink
[FPEnv][AMDGPU] Correct strictfp tests.
Browse files Browse the repository at this point in the history
Correct AMDGPU strictfp tests to follow the rules documented in the
LangRef:
https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics

These tests needed the strictfp attribute added to function calls and
some declarations.

Some of the tests now pass with D146845, others get farther along and
fail with D146845. The tests revealed that further work is required
in mostly AMDGPU atomics to get the tests passing.

Since I was here anyway I removed the strictfp attribute from some
constrained intrinsic declarations. They have this attribute by default.

Test changes verified with D146845.
  • Loading branch information
kpneal committed Feb 5, 2024
1 parent 1af0536 commit d15c454
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 30 deletions.
9 changes: 6 additions & 3 deletions llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,7 @@ define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1
ret <2 x half> %mul
}

define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #3 {
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand All @@ -955,7 +955,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1)
ret float %mul
}

define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #4 {
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand All @@ -967,7 +967,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1)
ret float %mul
}

define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #5 {
; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -1316,3 +1316,6 @@ declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
attributes #0 = { "denormal-fp-math"="ieee,ieee" }
attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }
attributes #3 = { "denormal-fp-math"="ieee,ieee" strictfp }
attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" strictfp }
attributes #5 = { "denormal-fp-math"="dynamic,dynamic" strictfp }
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2276,7 +2276,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB3_2
; GFX1132-DPP-NEXT: .LBB3_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
ret void
}
Expand Down Expand Up @@ -4174,7 +4174,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: global_atomic_add_f32 v4, v0, s[0:1]
; GFX1132-DPP-NEXT: .LBB6_2:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
ret void
}
Expand Down Expand Up @@ -5403,7 +5403,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scop
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2
; GFX1132-DPP-NEXT: .LBB8_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue monotonic, align 4
ret void
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2380,7 +2380,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB3_2
; GFX1132-DPP-NEXT: .LBB3_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("one-as") monotonic
ret void
}
Expand Down Expand Up @@ -4382,7 +4382,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-DPP-NEXT: .LBB6_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue syncscope("agent") monotonic
ret void
}
Expand Down Expand Up @@ -5611,7 +5611,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scop
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB8_2
; GFX1132-DPP-NEXT: .LBB8_3:
; GFX1132-DPP-NEXT: s_endpgm
%divValue = call float @div.float.value()
%divValue = call float @div.float.value() strictfp
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue monotonic, align 4
ret void
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ define i32 @strictfp_func_fpmode_i32() strictfp {
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%fpmode = call i32 @llvm.get.fpmode.i32() strictfp
ret i32 %fpmode
}

Expand Down
23 changes: 11 additions & 12 deletions llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -374,24 +374,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, |v0|
; GFX1011-NEXT: s_setpc_b64 s[30:31]
%abs.arg = call float @llvm.fabs.f32(float %arg)
%abs.arg = call float @llvm.fabs.f32(float %arg) #0
%result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret void
}

declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #1
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #1
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #1
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata)

declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) #1
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) #1
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)

declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) #1
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) #1
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata) #1
declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata)

declare float @llvm.fabs.f32(float) #1
declare float @llvm.fabs.f32(float)

attributes #0 = { strictfp }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s

declare void @f16_user(half)
declare half @f16_result()
declare void @f16_user(half) #0
declare half @f16_result() #0

declare void @v2f16_user(<2 x half>)
declare <2 x half> @v2f16_result()
declare void @v2f16_user(<2 x half>) #0
declare <2 x half> @v2f16_result() #0

declare void @v4f16_user(<4 x half>)
declare <4 x half> @v4f16_result()
declare void @v4f16_user(<4 x half>) #0
declare <4 x half> @v4f16_result() #0

declare void @v8f16_user(<8 x half>)
declare <8 x half> @v8f16_result()
declare void @v8f16_user(<8 x half>) #0
declare <8 x half> @v8f16_result() #0

define void @f16_arg(half %arg, ptr %ptr) #0 {
; GFX7-LABEL: f16_arg:
Expand Down

0 comments on commit d15c454

Please sign in to comment.