From ea72bbb18954fabca6df74b9b3c724c1e7089f03 Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Fri, 2 Aug 2024 21:40:24 +0200 Subject: [PATCH] [DXIL] Add sign intrinsic part 2 --- llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 31 +++ llvm/test/CodeGen/DirectX/sign.ll | 216 ++++++++++++++++++ 3 files changed, 248 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/sign.ll diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 32af50b25f3904..715b82a79c08cd 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -79,4 +79,5 @@ def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; +def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty]>; } diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 2daa4f825c3b25..1e57d025da3b8c 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" @@ -48,6 +49,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_fdot: case Intrinsic::dx_sdot: case Intrinsic::dx_udot: + case Intrinsic::dx_sign: return true; } return false; @@ -359,6 +361,32 @@ static Value *expandClampIntrinsic(CallInst *Orig, {MaxCall, Max}, nullptr, "dx.min"); } +static Value *expandSignIntrinsic(CallInst *Orig) { + IRBuilder<> Builder(Orig->getParent()); + Value *X = Orig->getOperand(0); + Type *Ty = X->getType(); + Type *ScalarTy = Ty->getScalarType(); + Type *RetTy = Orig->getType(); + Constant *Zero = Constant::getNullValue(Ty); + Builder.SetInsertPoint(Orig); + + Value *GT; + Value *LT; + if (ScalarTy->isFloatingPointTy()) { + GT = Builder.CreateFCmpOLT(Zero, X); + LT = Builder.CreateFCmpOLT(X, Zero); + } else { + assert(ScalarTy->isIntegerTy()); + GT = Builder.CreateICmpSLT(Zero, X); + LT = Builder.CreateICmpSLT(X, Zero); + } + + Value *ZextGT = Builder.CreateZExt(GT, RetTy); + Value *ZextLT = Builder.CreateZExt(LT, RetTy); + + return Builder.CreateSub(ZextGT, ZextLT); +} + static bool expandIntrinsic(Function &F, CallInst *Orig) { Value *Result = nullptr; Intrinsic::ID IntrinsicId = F.getIntrinsicID(); @@ -402,6 +430,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::dx_udot: Result = expandIntegerDotIntrinsic(Orig, IntrinsicId); break; + case Intrinsic::dx_sign: + Result = expandSignIntrinsic(Orig); + break; } if (Result) { diff --git a/llvm/test/CodeGen/DirectX/sign.ll b/llvm/test/CodeGen/DirectX/sign.ll new file mode 100644 index 00000000000000..2d9254a3abc77f --- /dev/null +++ b/llvm/test/CodeGen/DirectX/sign.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK +; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK + + +define noundef i32 @sign_half(half noundef %a) { +; CHECK-LABEL: define noundef i32 @sign_half( +; CHECK-SAME: half noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt half 0xH0000, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt half [[A]], 0xH0000 +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %elt.sign = call i32 @llvm.dx.sign.f16(half %a) + ret i32 %elt.sign +} + +define noundef i32 @sign_float(float noundef %a) { +; CHECK-LABEL: define noundef i32 @sign_float( +; CHECK-SAME: float noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt float 0.000000e+00, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt float [[A]], 0.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %elt.sign = call i32 @llvm.dx.sign.f32(float %a) + ret i32 %elt.sign +} + +define noundef i32 @sign_double(double noundef %a) { +; CHECK-LABEL: define noundef i32 @sign_double( +; CHECK-SAME: double noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt double 0.000000e+00, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A]], 0.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %elt.sign = call i32 @llvm.dx.sign.f64(double %a) + ret i32 %elt.sign +} + +define noundef i32 @sign_i16(i16 noundef %a) { +; CHECK-LABEL: define noundef i32 @sign_i16( +; CHECK-SAME: i16 noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 0, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %elt.sign = call i32 @llvm.dx.sign.i16(i16 %a) + ret i32 %elt.sign +} + +define noundef i32 @sign_i32(i32 noundef %a) { +; CHECK-LABEL: define noundef i32 @sign_i32( +; CHECK-SAME: i32 noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 0, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %elt.sign = call i32 @llvm.dx.sign.i32(i32 %a) + ret i32 %elt.sign +} + +define noundef i32 @sign_i64(i64 noundef %a) { +; CHECK-LABEL: define noundef i32 @sign_i64( +; CHECK-SAME: i64 noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 0, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[A]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %elt.sign = call i32 @llvm.dx.sign.i64(i64 %a) + ret i32 %elt.sign +} + +define noundef <4 x i32> @sign_half_vector(<4 x half> noundef %a) { +; CHECK-LABEL: define noundef <4 x i32> @sign_half_vector( +; CHECK-SAME: <4 x half> noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x half> zeroinitializer, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x half> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; +entry: + %elt.sign = call <4 x i32> @llvm.dx.sign.v4f16(<4 x half> %a) + ret <4 x i32> %elt.sign +} + +define noundef <4 x i32> @sign_float_vector(<4 x float> noundef %a) { +; CHECK-LABEL: define noundef <4 x i32> @sign_float_vector( +; CHECK-SAME: <4 x float> noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> zeroinitializer, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; +entry: + %elt.sign = call <4 x i32> @llvm.dx.sign.v4f32(<4 x float> %a) + ret <4 x i32> %elt.sign +} + +define noundef <4 x i32> @sign_double_vector(<4 x double> noundef %a) { +; CHECK-LABEL: define noundef <4 x i32> @sign_double_vector( +; CHECK-SAME: <4 x double> noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x double> zeroinitializer, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; +entry: + %elt.sign = call <4 x i32> @llvm.dx.sign.v4f64(<4 x double> %a) + ret <4 x i32> %elt.sign +} + +define noundef <4 x i32> @sign_i16_vector(<4 x i16> noundef %a) { +; CHECK-LABEL: define noundef <4 x i32> @sign_i16_vector( +; CHECK-SAME: <4 x i16> noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i16> zeroinitializer, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i16> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; +entry: + %elt.sign = call <4 x i32> @llvm.dx.sign.v4i16(<4 x i16> %a) + ret <4 x i32> %elt.sign +} + +define noundef <4 x i32> @sign_i32_vector(<4 x i32> noundef %a) { +; CHECK-LABEL: define noundef <4 x i32> @sign_i32_vector( +; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> zeroinitializer, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; +entry: + %elt.sign = call <4 x i32> @llvm.dx.sign.v4i32(<4 x i32> %a) + ret <4 x i32> %elt.sign +} + +define noundef <4 x i32> @sign_i64_vector(<4 x i64> noundef %a) { +; CHECK-LABEL: define noundef <4 x i32> @sign_i64_vector( +; CHECK-SAME: <4 x i64> noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i64> zeroinitializer, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; +entry: + %elt.sign = call <4 x i32> @llvm.dx.sign.v4i64(<4 x i64> %a) + ret <4 x i32> %elt.sign +} + + +declare i32 @llvm.dx.sign.f16(half) +declare i32 @llvm.dx.sign.f32(float) +declare i32 @llvm.dx.sign.f64(double) + +declare i32 @llvm.dx.sign.i16(i16) +declare i32 @llvm.dx.sign.i32(i32) +declare i32 @llvm.dx.sign.i64(i64) + +declare <4 x i32> @llvm.dx.sign.v4f16(<4 x half>) +declare <4 x i32> @llvm.dx.sign.v4f32(<4 x float>) +declare <4 x i32> @llvm.dx.sign.v4f64(<4 x double>) + +declare <4 x i32> @llvm.dx.sign.v4i16(<4 x i16>) +declare <4 x i32> @llvm.dx.sign.v4i32(<4 x i32>) +declare <4 x i32> @llvm.dx.sign.v4i64(<4 x i64>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; DOPCHECK: {{.*}} +; EXPCHECK: {{.*}}