-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add step builtins and step HLSL function to DirectX and SPIR-V backend #106471
Conversation
@llvm/pr-subscribers-backend-spir-v @llvm/pr-subscribers-clang Author: Joshua Batista (bob80905) ChangesThis PR adds the step intrinsic and an HLSL function that uses it. Used #102683 as a reference. Patch is 22.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106471.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index ac33672a32b336..ba062d7b563749 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4751,6 +4751,12 @@ def HLSLSaturate : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLStep: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_step"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
// Builtins for XRay.
def XRayCustomEvent : Builtin {
let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4204c8ff276ab1..2a6a3b7f4852fa 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18695,6 +18695,16 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
nullptr, "hlsl.saturate");
}
+ case Builtin::BI__builtin_hlsl_step: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+ E->getArg(1)->getType()->hasFloatingRepresentation() &&
+ "step operands must have a float representation");
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
+ ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
+ }
case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
return EmitRuntimeCall(CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 55a4b97c160cd6..9c50181ade0945 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -80,6 +80,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step)
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 6d38b668fe770e..7ab7ea075c3109 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1691,6 +1691,39 @@ float3 sqrt(float3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt)
float4 sqrt(float4);
+//===----------------------------------------------------------------------===//
+// step builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T step(T x, T y)
+/// \brief Returns 1 if the x parameter is greater than or equal to the y
+/// parameter; otherwise, 0. vector. \param x [in] The first floating-point
+/// value to compare. \param y [in] The first floating-point value to compare.
+///
+/// Step is based on the following formula: (x >= y) ? 1 : 0
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half step(half, half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half2 step(half2, half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half3 step(half3, half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half4 step(half4, half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float step(float, float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float2 step(float2, float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float3 step(float3, float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float4 step(float4, float4);
+
//===----------------------------------------------------------------------===//
// tan builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 714e8f5cfa9926..70ca25687de2db 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1490,6 +1490,18 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
TheCall->setType(ArgTyA);
break;
}
+ case Builtin::BI__builtin_hlsl_step: {
+ if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+ return true;
+ if (SemaRef.checkArgCount(TheCall, 2))
+ return true;
+
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ // return type is the same as the input type
+ TheCall->setType(ArgTyA);
+ break;
+ }
// Note these are llvm builtins that we want to catch invalid intrinsic
// generation. Normal handling of these builitns will occur elsewhere.
case Builtin::BI__builtin_elementwise_bitreverse: {
diff --git a/clang/test/CodeGenHLSL/builtins/step.hlsl b/clang/test/CodeGenHLSL/builtins/step.hlsl
new file mode 100644
index 00000000000000..43312716449902
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/step.hlsl
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK
+
+// DXIL_NATIVE_HALF: define noundef half @
+// SPIR_NATIVE_HALF: define spir_func noundef half @
+// DXIL_NATIVE_HALF: call half @llvm.dx.step.f16(half
+// SPIR_NATIVE_HALF: call half @llvm.spv.step.f16(half
+// DXIL_NO_HALF: call float @llvm.dx.step.f32(float
+// SPIR_NO_HALF: call float @llvm.spv.step.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_step_half(half p0, half p1)
+{
+ return step(p0, p1);
+}
+// DXIL_NATIVE_HALF: define noundef <2 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
+// DXIL_NATIVE_HALF: call <2 x half> @llvm.dx.step.v2f16(<2 x half>
+// SPIR_NATIVE_HALF: call <2 x half> @llvm.spv.step.v2f16(<2 x half>
+// DXIL_NO_HALF: call <2 x float> @llvm.dx.step.v2f32(<2 x float>
+// SPIR_NO_HALF: call <2 x float> @llvm.spv.step.v2f32(<2 x float>
+// NATIVE_HALF: ret <2 x half> %hlsl.step
+// NO_HALF: ret <2 x float> %hlsl.step
+half2 test_step_half2(half2 p0, half2 p1)
+{
+ return step(p0, p1);
+}
+// DXIL_NATIVE_HALF: define noundef <3 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
+// DXIL_NATIVE_HALF: call <3 x half> @llvm.dx.step.v3f16(<3 x half>
+// SPIR_NATIVE_HALF: call <3 x half> @llvm.spv.step.v3f16(<3 x half>
+// DXIL_NO_HALF: call <3 x float> @llvm.dx.step.v3f32(<3 x float>
+// SPIR_NO_HALF: call <3 x float> @llvm.spv.step.v3f32(<3 x float>
+// NATIVE_HALF: ret <3 x half> %hlsl.step
+// NO_HALF: ret <3 x float> %hlsl.step
+half3 test_step_half3(half3 p0, half3 p1)
+{
+ return step(p0, p1);
+}
+// DXIL_NATIVE_HALF: define noundef <4 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
+// DXIL_NATIVE_HALF: call <4 x half> @llvm.dx.step.v4f16(<4 x half>
+// SPIR_NATIVE_HALF: call <4 x half> @llvm.spv.step.v4f16(<4 x half>
+// DXIL_NO_HALF: call <4 x float> @llvm.dx.step.v4f32(<4 x float>
+// SPIR_NO_HALF: call <4 x float> @llvm.spv.step.v4f32(<4 x float>
+// NATIVE_HALF: ret <4 x half> %hlsl.step
+// NO_HALF: ret <4 x float> %hlsl.step
+half4 test_step_half4(half4 p0, half4 p1)
+{
+ return step(p0, p1);
+}
+
+// DXIL_CHECK: define noundef float @
+// SPIR_CHECK: define spir_func noundef float @
+// DXIL_CHECK: call float @llvm.dx.step.f32(float
+// SPIR_CHECK: call float @llvm.spv.step.f32(float
+// CHECK: ret float
+float test_step_float(float p0, float p1)
+{
+ return step(p0, p1);
+}
+// DXIL_CHECK: define noundef <2 x float> @
+// SPIR_CHECK: define spir_func noundef <2 x float> @
+// DXIL_CHECK: %hlsl.step = call <2 x float> @llvm.dx.step.v2f32(
+// SPIR_CHECK: %hlsl.step = call <2 x float> @llvm.spv.step.v2f32(<2 x float>
+// CHECK: ret <2 x float> %hlsl.step
+float2 test_step_float2(float2 p0, float2 p1)
+{
+ return step(p0, p1);
+}
+// DXIL_CHECK: define noundef <3 x float> @
+// SPIR_CHECK: define spir_func noundef <3 x float> @
+// DXIL_CHECK: %hlsl.step = call <3 x float> @llvm.dx.step.v3f32(
+// SPIR_CHECK: %hlsl.step = call <3 x float> @llvm.spv.step.v3f32(<3 x float>
+// CHECK: ret <3 x float> %hlsl.step
+float3 test_step_float3(float3 p0, float3 p1)
+{
+ return step(p0, p1);
+}
+// DXIL_CHECK: define noundef <4 x float> @
+// SPIR_CHECK: define spir_func noundef <4 x float> @
+// DXIL_CHECK: %hlsl.step = call <4 x float> @llvm.dx.step.v4f32(
+// SPIR_CHECK: %hlsl.step = call <4 x float> @llvm.spv.step.v4f32(
+// CHECK: ret <4 x float> %hlsl.step
+float4 test_step_float4(float4 p0, float4 p1)
+{
+ return step(p0, p1);
+}
\ No newline at end of file
diff --git a/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
new file mode 100644
index 00000000000000..ccd21847f2f367
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
+
+void test_too_few_arg()
+{
+ return __builtin_hlsl_step();
+ // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+void test_too_many_arg(float2 p0)
+{
+ return __builtin_hlsl_step(p0, p0, p0);
+ // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_step_int_to_float_promotion(int p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_step_int2_to_float2_promotion(int2 p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
\ No newline at end of file
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 32af50b25f3904..741f1bb6cbe3cc 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -79,4 +79,5 @@ def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV
def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 63d9ba43a1183b..5ceaed7ee711fb 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -67,6 +67,7 @@ let TargetPrefix = "spv" in {
def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty]>;
def int_spv_fdot :
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyfloat_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 2daa4f825c3b25..a04d6acc1a1352 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -48,6 +48,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_fdot:
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
+ case Intrinsic::dx_step:
return true;
}
return false;
@@ -320,6 +321,28 @@ static Value *expandPowIntrinsic(CallInst *Orig) {
return Exp2Call;
}
+static Value *expandStepIntrinsic(CallInst *Orig) {
+
+ Value *X = Orig->getOperand(0);
+ Value *Y = Orig->getOperand(1);
+ Type *Ty = X->getType();
+ IRBuilder<> Builder(Orig);
+
+ Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
+ Constant *zero = ConstantFP::get(Ty->getScalarType(), 0.0);
+ Value *cond = Builder.CreateFCmpOLT(Y, X);
+
+ if (Ty != Ty->getScalarType()) {
+ auto *XVec = dyn_cast<FixedVectorType>(Ty);
+ one = ConstantVector::getSplat(
+ ElementCount::getFixed(XVec->getNumElements()), one);
+ zero = ConstantVector::getSplat(
+ ElementCount::getFixed(XVec->getNumElements()), zero);
+ }
+
+ return Builder.CreateSelect(cond, zero, one);
+}
+
static Intrinsic::ID getMaxForClamp(Type *ElemTy,
Intrinsic::ID ClampIntrinsic) {
if (ClampIntrinsic == Intrinsic::dx_uclamp)
@@ -402,6 +425,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_udot:
Result = expandIntegerDotIntrinsic(Orig, IntrinsicId);
break;
+ case Intrinsic::dx_step:
+ Result = expandStepIntrinsic(Orig);
+ break;
}
if (Result) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9e10d947081cc3..23cb42fb97130f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -259,6 +259,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectSpvThreadId(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
+ bool selectStep(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
bool selectUnmergeValues(MachineInstr &I) const;
Register buildI32Constant(uint32_t Val, MachineInstr &I,
@@ -1603,6 +1606,25 @@ bool SPIRVInstructionSelector::selectSaturate(Register ResVReg,
.constrainAllUses(TII, TRI, RBI);
}
+bool SPIRVInstructionSelector::selectStep(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+
+ assert(I.getNumOperands() == 4);
+ assert(I.getOperand(2).isReg());
+ assert(I.getOperand(3).isReg());
+ MachineBasicBlock &BB = *I.getParent();
+
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+ .addImm(GL::Step)
+ .addUse(I.getOperand(2).getReg())
+ .addUse(I.getOperand(3).getReg())
+ .constrainAllUses(TII, TRI, RBI);
+}
+
bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
@@ -2351,6 +2373,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
} break;
case Intrinsic::spv_saturate:
return selectSaturate(ResVReg, ResType, I);
+ case Intrinsic::spv_step:
+ return selectStep(ResVReg, ResType, I);
default: {
std::string DiagMsg;
raw_string_ostream OS(DiagMsg);
diff --git a/llvm/test/CodeGen/DirectX/step.ll b/llvm/test/CodeGen/DirectX/step.ll
new file mode 100644
index 00000000000000..9a25a371f6efd0
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/step.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+
+; Make sure dxil operation function calls for step are generated for half/float.
+
+declare half @llvm.dx.step.f16(half, half)
+declare <2 x half> @llvm.dx.step.v2f16(<2 x half>, <2 x half>)
+declare <3 x half> @llvm.dx.step.v3f16(<3 x half>, <3 x half>)
+declare <4 x half> @llvm.dx.step.v4f16(<4 x half>, <4 x half>)
+
+declare float @llvm.dx.step.f32(float, float)
+declare <2 x float> @llvm.dx.step.v2f32(<2 x float>, <2 x float>)
+declare <3 x float> @llvm.dx.step.v3f32(<3 x float>, <3 x float>)
+declare <4 x float> @llvm.dx.step.v4f32(<4 x float>, <4 x float>)
+
+define noundef half @test_step_half(half noundef %p0, half noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt half %p1, %p0
+ ; CHECK: %1 = select i1 %0, half 0xH0000, half 0xH3C00
+ ; DOPCHECK: asdahg
+ %hlsl.step = call half @llvm.dx.step.f16(half %p0, half %p1)
+ ret half %hlsl.step
+}
+
+define noundef <2 x half> @test_step_half2(<2 x half> noundef %p0, <2 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <2 x half> %p1, %p0
+ ; CHECK: %1 = select <2 x i1> %0, <2 x half> zeroinitializer, <2 x half> <half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <2 x half> @llvm.dx.step.v2f16(<2 x half> %p0, <2 x half> %p1)
+ ret <2 x half> %hlsl.step
+}
+
+define noundef <3 x half> @test_step_half3(<3 x half> noundef %p0, <3 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <3 x half> %p1, %p0
+ ; CHECK: %1 = select <3 x i1> %0, <3 x half> zeroinitializer, <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <3 x half> @llvm.dx.step.v3f16(<3 x half> %p0, <3 x half> %p1)
+ ret <3 x half> %hlsl.step
+}
+
+define noundef <4 x half> @test_step_half4(<4 x half> noundef %p0, <4 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <4 x half> %p1, %p0
+ ; CHECK: %1 = select <4 x i1> %0, <4 x half> zeroinitializer, <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <4 x half> @llvm.dx.step.v4f16(<4 x half> %p0, <4 x half> %p1)
+ ret <4 x half> %hlsl.step
+}
+
+define noundef float @test_step_float(float noundef %p0, float noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt float %p1, %p0
+ ; CHECK: %1 = select i1 %0, float 0.000000e+00, float 1.000000e+00
+ %hlsl.step = call float @llvm.dx.step.f32(float %p0, float %p1)
+ ret float %hlsl.step
+}
+
+define noundef <2 x float> @test_step_float2(<2 x float> noundef %p0, <2 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <2 x float> %p1, %p0
+ ; CHECK: %1 = select <2 x i1> %0, <2 x float> zeroinitializer, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %hlsl.step = call <2 x float> @llvm.dx.step.v2f32(<2 x float> %p0, <2 x float> %p1)
+ ret <2 x float> %hlsl.step
+}
+
+define noundef <3 x float> @test_step_float3(<3 x float> noundef %p0, <3 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <3 x float> %p1, %p0
+ ; CHECK: %1 = select <3 x i1> %0, <3 x float> zeroinitializer, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %hlsl.step = call <3 x float> @llvm.dx.step.v3f32(<3 x float> %p0, <3 x float> %p1)
+ ret <3 x float> %hlsl.step
+}
+
+define noundef <4 x float> @test_step_float4(<4 x float> noundef %p0, <4 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <4 x float> %p1, %p0
+ ; CHECK: %1 = select <4 x i1> %0, <4 x float> zeroinitializer, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1...
[truncated]
|
// DXIL_NATIVE_HALF: define noundef half @ | ||
// SPIR_NATIVE_HALF: define spir_func noundef half @ | ||
// DXIL_NATIVE_HALF: call half @llvm.dx.step.f16(half | ||
// SPIR_NATIVE_HALF: call half @llvm.spv.step.f16(half | ||
// DXIL_NO_HALF: call float @llvm.dx.step.f32(float | ||
// SPIR_NO_HALF: call float @llvm.spv.step.f32(float | ||
// NATIVE_HALF: ret half | ||
// NO_HALF: ret float | ||
half test_step_half(half p0, half p1) | ||
{ | ||
return step(p0, p1); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The in-tree builtin tests were updated recently to clean up a bunch of the differences between DXIL and SPIRV (See #105930). Could you please update this test to that newer style?
Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0); | ||
Constant *zero = ConstantFP::get(Ty->getScalarType(), 0.0); | ||
Value *cond = Builder.CreateFCmpOLT(Y, X); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Style nit: Variables should start with capital letters (One, Zero, Cond)
llvm/test/CodeGen/DirectX/step.ll
Outdated
entry: | ||
; CHECK: %0 = fcmp olt half %p1, %p0 | ||
; CHECK: %1 = select i1 %0, half 0xH0000, half 0xH3C00 | ||
; DOPCHECK: asdahg |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo / something left over from debugging?
} | ||
|
||
declare <4 x half> @llvm.spv.step.v4f16(<4 x half>, <4 x half>) | ||
declare <4 x float> @llvm.spv.step.v4f32(<4 x float>, <4 x float>) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a newline at the end of the file
case Builtin::BI__builtin_hlsl_step: { | ||
if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall)) | ||
return true; | ||
if (SemaRef.checkArgCount(TheCall, 2)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NIT: we usually check the arg count first.
{ | ||
return __builtin_hlsl_step(p1, p1); | ||
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a new line
✅ With the latest revision this PR passed the C/C++ code formatter. |
…end (llvm#109180) This PR adds the step intrinsic and an HLSL function that uses it. The SPIRV backend is also implemented. Used llvm#106471 as a reference. Fixes llvm#99095
This PR adds the step intrinsic and an HLSL function that uses it.
The SPIRV backend is also implemented.
Used #102683 as a reference.
Fixes #99157