Skip to content

Commit

Permalink
[LoopVectorize][X86][AMDLibm] Add Missing AMD LibM trig vector intrin…
Browse files Browse the repository at this point in the history
  • Loading branch information
farzonl authored Aug 12, 2024
1 parent 80ff391 commit efc6b50
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 0 deletions.
6 changes: 6 additions & 0 deletions llvm/include/llvm/Analysis/VecFuncs.def
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,9 @@ TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_

TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("acosf", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

Expand All @@ -1389,17 +1391,21 @@ TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LL
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("cosh", "amd_vrd2_cosh" , FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("coshf", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("coshf", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("llvm.cosh.f64", "amd_vrd2_cosh" , FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("tanhf", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,10 @@ define void @acos_f32(ptr nocapture %varray) {
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
; CHECK: ret void
;
; CHECK-AVX512-VF16-LABEL: @acos_f32(
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
; CHECK-AVX512-VF16: ret void
;
entry:
br label %for.body

Expand All @@ -435,6 +439,10 @@ define void @acos_f32_intrinsic(ptr nocapture %varray) {
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
; CHECK: ret void
;
; CHECK-AVX512-VF16-LABEL: @acos_f32_intrinsic(
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
; CHECK-AVX512-VF16: ret void
;
entry:
br label %for.body

Expand Down Expand Up @@ -669,6 +677,29 @@ for.end:
ret void
}

define void @cosh_f64(ptr nocapture %varray) {
; CHECK-AVX-VF2-LABEL: @cosh_f64(
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
; CHECK-AVX-VF2: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @cosh(double %conv)
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
store double %call, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret void
}

define void @cosh_f32(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f32(
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
Expand All @@ -692,6 +723,29 @@ for.end:
ret void
}

define void @cosh_f64_intrinsic(ptr nocapture %varray) {
; CHECK-AVX-VF2-LABEL: @cosh_f64_intrinsic(
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
; CHECK-AVX-VF2: ret void
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%tmp = trunc i64 %iv to i32
%conv = sitofp i32 %tmp to double
%call = tail call double @llvm.cosh.f64(double %conv)
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
store double %call, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body

for.end:
ret void
}

define void @cosh_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f32_intrinsic(
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
Expand Down Expand Up @@ -720,6 +774,10 @@ define void @tanh_f32(ptr nocapture %varray) {
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
; CHECK: ret void
;
; CHECK-AVX512-VF16-LABEL: @tanh_f32(
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
; CHECK-AVX512-VF16: ret void
;
entry:
br label %for.body

Expand All @@ -743,6 +801,10 @@ define void @tanh_f32_intrinsic(ptr nocapture %varray) {
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
; CHECK: ret void
;
; CHECK-AVX512-VF16-LABEL: @tanh_f32_intrinsic(
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
; CHECK-AVX512-VF16: ret void
;
entry:
br label %for.body

Expand Down

0 comments on commit efc6b50

Please sign in to comment.