Add step builtins and step HLSL function to DirectX and SPIR-V backend #106471

bob80905 · 2024-08-28T23:37:47Z

This PR adds the step intrinsic and an HLSL function that uses it.
The SPIRV backend is also implemented.

Used #102683 as a reference.
Fixes #99157

llvmbot · 2024-08-28T23:38:21Z

@llvm/pr-subscribers-backend-spir-v
@llvm/pr-subscribers-hlsl
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-backend-directx

@llvm/pr-subscribers-clang

Author: Joshua Batista (bob80905)

Changes

This PR adds the step intrinsic and an HLSL function that uses it.
The SPIRV backend is also implemented.

Used #102683 as a reference.
Fixes #99157

Patch is 22.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106471.diff

13 Files Affected:

(modified) clang/include/clang/Basic/Builtins.td (+6)
(modified) clang/lib/CodeGen/CGBuiltin.cpp (+10)
(modified) clang/lib/CodeGen/CGHLSLRuntime.h (+1)
(modified) clang/lib/Headers/hlsl/hlsl_intrinsics.h (+33)
(modified) clang/lib/Sema/SemaHLSL.cpp (+12)
(added) clang/test/CodeGenHLSL/builtins/step.hlsl (+100)
(added) clang/test/SemaHLSL/BuiltIns/step-errors.hlsl (+31)
(modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+1)
(modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+1)
(modified) llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp (+26)
(modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+24)
(added) llvm/test/CodeGen/DirectX/step.ll (+79)
(added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll (+33)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index ac33672a32b336..ba062d7b563749 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4751,6 +4751,12 @@ def HLSLSaturate : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLStep: LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_step"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4204c8ff276ab1..2a6a3b7f4852fa 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18695,6 +18695,16 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
         CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
         nullptr, "hlsl.saturate");
   }
+  case Builtin::BI__builtin_hlsl_step: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Op1 = EmitScalarExpr(E->getArg(1));
+    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+           E->getArg(1)->getType()->hasFloatingRepresentation() &&
+           "step operands must have a float representation");
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
+        ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
+  }
   case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
     return EmitRuntimeCall(CGM.CreateRuntimeFunction(
         llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 55a4b97c160cd6..9c50181ade0945 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -80,6 +80,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step)
   GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
   GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
   GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 6d38b668fe770e..7ab7ea075c3109 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1691,6 +1691,39 @@ float3 sqrt(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt)
 float4 sqrt(float4);
 
+//===----------------------------------------------------------------------===//
+// step builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T step(T x, T y)
+/// \brief Returns 1 if the x parameter is greater than or equal to the y
+/// parameter; otherwise, 0. vector. \param x [in] The first floating-point
+/// value to compare. \param y [in] The first floating-point value to compare.
+///
+/// Step is based on the following formula: (x >= y) ? 1 : 0
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half step(half, half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half2 step(half2, half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half3 step(half3, half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+half4 step(half4, half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float step(float, float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float2 step(float2, float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float3 step(float3, float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step)
+float4 step(float4, float4);
+
 //===----------------------------------------------------------------------===//
 // tan builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 714e8f5cfa9926..70ca25687de2db 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1490,6 +1490,18 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     TheCall->setType(ArgTyA);
     break;
   }
+  case Builtin::BI__builtin_hlsl_step: {
+    if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+      return true;
+    if (SemaRef.checkArgCount(TheCall, 2))
+      return true;
+
+    ExprResult A = TheCall->getArg(0);
+    QualType ArgTyA = A.get()->getType();
+    // return type is the same as the input type
+    TheCall->setType(ArgTyA);
+    break;
+  }
   // Note these are llvm builtins that we want to catch invalid intrinsic
   // generation. Normal handling of these builitns will occur elsewhere.
   case Builtin::BI__builtin_elementwise_bitreverse: {
diff --git a/clang/test/CodeGenHLSL/builtins/step.hlsl b/clang/test/CodeGenHLSL/builtins/step.hlsl
new file mode 100644
index 00000000000000..43312716449902
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/step.hlsl
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK
+
+// DXIL_NATIVE_HALF: define noundef half @
+// SPIR_NATIVE_HALF: define spir_func noundef half @
+// DXIL_NATIVE_HALF: call half @llvm.dx.step.f16(half
+// SPIR_NATIVE_HALF: call half @llvm.spv.step.f16(half
+// DXIL_NO_HALF: call float @llvm.dx.step.f32(float
+// SPIR_NO_HALF: call float @llvm.spv.step.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_step_half(half p0, half p1)
+{
+    return step(p0, p1);
+}
+// DXIL_NATIVE_HALF: define noundef <2 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
+// DXIL_NATIVE_HALF: call <2 x half> @llvm.dx.step.v2f16(<2 x half>
+// SPIR_NATIVE_HALF: call <2 x half> @llvm.spv.step.v2f16(<2 x half>
+// DXIL_NO_HALF: call <2 x float> @llvm.dx.step.v2f32(<2 x float>
+// SPIR_NO_HALF: call <2 x float> @llvm.spv.step.v2f32(<2 x float>
+// NATIVE_HALF: ret <2 x half> %hlsl.step
+// NO_HALF: ret <2 x float> %hlsl.step
+half2 test_step_half2(half2 p0, half2 p1)
+{
+    return step(p0, p1);
+}
+// DXIL_NATIVE_HALF: define noundef <3 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
+// DXIL_NATIVE_HALF: call <3 x half> @llvm.dx.step.v3f16(<3 x half>
+// SPIR_NATIVE_HALF: call <3 x half> @llvm.spv.step.v3f16(<3 x half>
+// DXIL_NO_HALF: call <3 x float> @llvm.dx.step.v3f32(<3 x float>
+// SPIR_NO_HALF: call <3 x float> @llvm.spv.step.v3f32(<3 x float>
+// NATIVE_HALF: ret <3 x half> %hlsl.step
+// NO_HALF: ret <3 x float> %hlsl.step
+half3 test_step_half3(half3 p0, half3 p1)
+{
+    return step(p0, p1);
+}
+// DXIL_NATIVE_HALF: define noundef <4 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
+// DXIL_NATIVE_HALF: call <4 x half> @llvm.dx.step.v4f16(<4 x half>
+// SPIR_NATIVE_HALF: call <4 x half> @llvm.spv.step.v4f16(<4 x half>
+// DXIL_NO_HALF: call <4 x float> @llvm.dx.step.v4f32(<4 x float>
+// SPIR_NO_HALF: call <4 x float> @llvm.spv.step.v4f32(<4 x float>
+// NATIVE_HALF: ret <4 x half> %hlsl.step
+// NO_HALF: ret <4 x float> %hlsl.step
+half4 test_step_half4(half4 p0, half4 p1)
+{
+    return step(p0, p1);
+}
+
+// DXIL_CHECK: define noundef float @
+// SPIR_CHECK: define spir_func noundef float @
+// DXIL_CHECK: call float @llvm.dx.step.f32(float
+// SPIR_CHECK: call float @llvm.spv.step.f32(float
+// CHECK: ret float
+float test_step_float(float p0, float p1)
+{
+    return step(p0, p1);
+}
+// DXIL_CHECK: define noundef <2 x float> @
+// SPIR_CHECK: define spir_func noundef <2 x float> @
+// DXIL_CHECK: %hlsl.step = call <2 x float> @llvm.dx.step.v2f32(
+// SPIR_CHECK: %hlsl.step = call <2 x float> @llvm.spv.step.v2f32(<2 x float>
+// CHECK: ret <2 x float> %hlsl.step
+float2 test_step_float2(float2 p0, float2 p1)
+{
+    return step(p0, p1);
+}
+// DXIL_CHECK: define noundef <3 x float> @
+// SPIR_CHECK: define spir_func noundef <3 x float> @
+// DXIL_CHECK: %hlsl.step = call <3 x float> @llvm.dx.step.v3f32(
+// SPIR_CHECK: %hlsl.step = call <3 x float> @llvm.spv.step.v3f32(<3 x float>
+// CHECK: ret <3 x float> %hlsl.step
+float3 test_step_float3(float3 p0, float3 p1)
+{
+    return step(p0, p1);
+}
+// DXIL_CHECK: define noundef <4 x float> @
+// SPIR_CHECK: define spir_func noundef <4 x float> @
+// DXIL_CHECK: %hlsl.step = call <4 x float> @llvm.dx.step.v4f32(
+// SPIR_CHECK: %hlsl.step = call <4 x float> @llvm.spv.step.v4f32(
+// CHECK: ret <4 x float> %hlsl.step
+float4 test_step_float4(float4 p0, float4 p1)
+{
+    return step(p0, p1);
+}
\ No newline at end of file
diff --git a/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
new file mode 100644
index 00000000000000..ccd21847f2f367
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
+
+void test_too_few_arg()
+{
+  return __builtin_hlsl_step();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+void test_too_many_arg(float2 p0)
+{
+  return __builtin_hlsl_step(p0, p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1)
+{
+  return __builtin_hlsl_step(p1, p1);
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_step_int_to_float_promotion(int p1)
+{
+  return __builtin_hlsl_step(p1, p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_step_int2_to_float2_promotion(int2 p1)
+{
+  return __builtin_hlsl_step(p1, p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
\ No newline at end of file
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 32af50b25f3904..741f1bb6cbe3cc 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -79,4 +79,5 @@ def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV
 def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
 def int_dx_rcp  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 def int_dx_rsqrt  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>]>;
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 63d9ba43a1183b..5ceaed7ee711fb 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -67,6 +67,7 @@ let TargetPrefix = "spv" in {
   def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
   def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
   def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty]>;
   def int_spv_fdot :
     DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
     [llvm_anyfloat_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 2daa4f825c3b25..a04d6acc1a1352 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -48,6 +48,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_fdot:
   case Intrinsic::dx_sdot:
   case Intrinsic::dx_udot:
+  case Intrinsic::dx_step:
     return true;
   }
   return false;
@@ -320,6 +321,28 @@ static Value *expandPowIntrinsic(CallInst *Orig) {
   return Exp2Call;
 }
 
+static Value *expandStepIntrinsic(CallInst *Orig) {
+
+  Value *X = Orig->getOperand(0);
+  Value *Y = Orig->getOperand(1);
+  Type *Ty = X->getType();
+  IRBuilder<> Builder(Orig);
+
+  Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
+  Constant *zero = ConstantFP::get(Ty->getScalarType(), 0.0);
+  Value *cond = Builder.CreateFCmpOLT(Y, X);
+
+  if (Ty != Ty->getScalarType()) {
+    auto *XVec = dyn_cast<FixedVectorType>(Ty);
+    one = ConstantVector::getSplat(
+        ElementCount::getFixed(XVec->getNumElements()), one);
+    zero = ConstantVector::getSplat(
+        ElementCount::getFixed(XVec->getNumElements()), zero);
+  }
+
+  return Builder.CreateSelect(cond, zero, one);
+}
+
 static Intrinsic::ID getMaxForClamp(Type *ElemTy,
                                     Intrinsic::ID ClampIntrinsic) {
   if (ClampIntrinsic == Intrinsic::dx_uclamp)
@@ -402,6 +425,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_udot:
     Result = expandIntegerDotIntrinsic(Orig, IntrinsicId);
     break;
+  case Intrinsic::dx_step:
+    Result = expandStepIntrinsic(Orig);
+    break;
   }
 
   if (Result) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9e10d947081cc3..23cb42fb97130f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -259,6 +259,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool selectSpvThreadId(Register ResVReg, const SPIRVType *ResType,
                          MachineInstr &I) const;
 
+  bool selectStep(Register ResVReg, const SPIRVType *ResType,
+                  MachineInstr &I) const;
+
   bool selectUnmergeValues(MachineInstr &I) const;
 
   Register buildI32Constant(uint32_t Val, MachineInstr &I,
@@ -1603,6 +1606,25 @@ bool SPIRVInstructionSelector::selectSaturate(Register ResVReg,
       .constrainAllUses(TII, TRI, RBI);
 }
 
+bool SPIRVInstructionSelector::selectStep(Register ResVReg,
+                                          const SPIRVType *ResType,
+                                          MachineInstr &I) const {
+
+  assert(I.getNumOperands() == 4);
+  assert(I.getOperand(2).isReg());
+  assert(I.getOperand(3).isReg());
+  MachineBasicBlock &BB = *I.getParent();
+
+  return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+      .addDef(ResVReg)
+      .addUse(GR.getSPIRVTypeID(ResType))
+      .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+      .addImm(GL::Step)
+      .addUse(I.getOperand(2).getReg())
+      .addUse(I.getOperand(3).getReg())
+      .constrainAllUses(TII, TRI, RBI);
+}
+
 bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg,
                                                 const SPIRVType *ResType,
                                                 MachineInstr &I) const {
@@ -2351,6 +2373,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   } break;
   case Intrinsic::spv_saturate:
     return selectSaturate(ResVReg, ResType, I);
+  case Intrinsic::spv_step:
+    return selectStep(ResVReg, ResType, I);
   default: {
     std::string DiagMsg;
     raw_string_ostream OS(DiagMsg);
diff --git a/llvm/test/CodeGen/DirectX/step.ll b/llvm/test/CodeGen/DirectX/step.ll
new file mode 100644
index 00000000000000..9a25a371f6efd0
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/step.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S  -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+
+; Make sure dxil operation function calls for step are generated for half/float.
+
+declare half @llvm.dx.step.f16(half, half)
+declare <2 x half> @llvm.dx.step.v2f16(<2 x half>, <2 x half>)
+declare <3 x half> @llvm.dx.step.v3f16(<3 x half>, <3 x half>)
+declare <4 x half> @llvm.dx.step.v4f16(<4 x half>, <4 x half>)
+
+declare float @llvm.dx.step.f32(float, float)
+declare <2 x float> @llvm.dx.step.v2f32(<2 x float>, <2 x float>)
+declare <3 x float> @llvm.dx.step.v3f32(<3 x float>, <3 x float>)
+declare <4 x float> @llvm.dx.step.v4f32(<4 x float>, <4 x float>)
+
+define noundef half @test_step_half(half noundef %p0, half noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt half %p1, %p0
+  ; CHECK: %1 = select i1 %0, half 0xH0000, half 0xH3C00
+  ; DOPCHECK: asdahg
+  %hlsl.step = call half @llvm.dx.step.f16(half %p0, half %p1)
+  ret half %hlsl.step
+}
+
+define noundef <2 x half> @test_step_half2(<2 x half> noundef %p0, <2 x half> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <2 x half> %p1, %p0
+  ; CHECK: %1 = select <2 x i1> %0, <2 x half> zeroinitializer, <2 x half> <half 0xH3C00, half 0xH3C00>
+  %hlsl.step = call <2 x half> @llvm.dx.step.v2f16(<2 x half> %p0, <2 x half> %p1)
+  ret <2 x half> %hlsl.step
+}
+
+define noundef <3 x half> @test_step_half3(<3 x half> noundef %p0, <3 x half> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <3 x half> %p1, %p0
+  ; CHECK: %1 = select <3 x i1> %0, <3 x half> zeroinitializer, <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>
+  %hlsl.step = call <3 x half> @llvm.dx.step.v3f16(<3 x half> %p0, <3 x half> %p1)
+  ret <3 x half> %hlsl.step
+}
+
+define noundef <4 x half> @test_step_half4(<4 x half> noundef %p0, <4 x half> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <4 x half> %p1, %p0
+  ; CHECK: %1 = select <4 x i1> %0, <4 x half> zeroinitializer, <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
+  %hlsl.step = call <4 x half> @llvm.dx.step.v4f16(<4 x half> %p0, <4 x half> %p1)
+  ret <4 x half> %hlsl.step
+}
+
+define noundef float @test_step_float(float noundef %p0, float noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt float %p1, %p0
+  ; CHECK: %1 = select i1 %0, float 0.000000e+00, float 1.000000e+00
+  %hlsl.step = call float @llvm.dx.step.f32(float %p0, float %p1)
+  ret float %hlsl.step
+}
+
+define noundef <2 x float> @test_step_float2(<2 x float> noundef %p0, <2 x float> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <2 x float> %p1, %p0
+  ; CHECK: %1 = select <2 x i1> %0, <2 x float> zeroinitializer, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+  %hlsl.step = call <2 x float> @llvm.dx.step.v2f32(<2 x float> %p0, <2 x float> %p1)
+  ret <2 x float> %hlsl.step
+}
+
+define noundef <3 x float> @test_step_float3(<3 x float> noundef %p0, <3 x float> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <3 x float> %p1, %p0
+  ; CHECK: %1 = select <3 x i1> %0, <3 x float> zeroinitializer, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %hlsl.step = call <3 x float> @llvm.dx.step.v3f32(<3 x float> %p0, <3 x float> %p1)
+  ret <3 x float> %hlsl.step
+}
+
+define noundef <4 x float> @test_step_float4(<4 x float> noundef %p0, <4 x float> noundef %p1) {
+entry:
+  ; CHECK: %0 = fcmp olt <4 x float> %p1, %p0
+  ; CHECK: %1 = select <4 x i1> %0, <4 x float> zeroinitializer, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1...
[truncated]

bogner · 2024-09-06T22:52:15Z

clang/test/CodeGenHLSL/builtins/step.hlsl

+// DXIL_NATIVE_HALF: define noundef half @
+// SPIR_NATIVE_HALF: define spir_func noundef half @
+// DXIL_NATIVE_HALF: call half @llvm.dx.step.f16(half
+// SPIR_NATIVE_HALF: call half @llvm.spv.step.f16(half
+// DXIL_NO_HALF: call float @llvm.dx.step.f32(float
+// SPIR_NO_HALF: call float @llvm.spv.step.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_step_half(half p0, half p1)
+{
+    return step(p0, p1);
+}


The in-tree builtin tests were updated recently to clean up a bunch of the differences between DXIL and SPIRV (See #105930). Could you please update this test to that newer style?

bogner · 2024-09-06T22:52:57Z

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

+  Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
+  Constant *zero = ConstantFP::get(Ty->getScalarType(), 0.0);
+  Value *cond = Builder.CreateFCmpOLT(Y, X);


Style nit: Variables should start with capital letters (One, Zero, Cond)

bogner · 2024-09-06T22:54:35Z

llvm/test/CodeGen/DirectX/step.ll

+entry:
+  ; CHECK: %0 = fcmp olt half %p1, %p0
+  ; CHECK: %1 = select i1 %0, half 0xH0000, half 0xH3C00
+  ; DOPCHECK: asdahg


Typo / something left over from debugging?

bogner · 2024-09-06T22:54:59Z

llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll

+}
+
+declare <4 x half> @llvm.spv.step.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.spv.step.v4f32(<4 x float>, <4 x float>)


Please add a newline at the end of the file

farzonl · 2024-09-11T22:46:22Z

clang/lib/Sema/SemaHLSL.cpp

+  case Builtin::BI__builtin_hlsl_step: {
+    if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+      return true;
+    if (SemaRef.checkArgCount(TheCall, 2))


NIT: we usually check the arg count first.

farzonl · 2024-09-11T22:46:59Z

clang/test/SemaHLSL/BuiltIns/step-errors.hlsl

+{
+  return __builtin_hlsl_step(p1, p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}


add a new line

github-actions · 2024-09-11T23:26:14Z

✅ With the latest revision this PR passed the C/C++ code formatter.

…end (#109180) This PR adds the step intrinsic and an HLSL function that uses it. The SPIRV backend is also implemented. Used #106471 as a reference. Fixes #99095

…end (llvm#109180) This PR adds the step intrinsic and an HLSL function that uses it. The SPIRV backend is also implemented. Used llvm#106471 as a reference. Fixes llvm#99095

add step

7dfcf80

damyanp approved these changes Aug 30, 2024

View reviewed changes

python3kgae approved these changes Sep 4, 2024

View reviewed changes

bogner reviewed Sep 6, 2024

View reviewed changes

farzonl reviewed Sep 11, 2024

View reviewed changes

farzonl approved these changes Sep 11, 2024

View reviewed changes

Merge branch 'main' into implement_hlsl_step_function

7924bab

bob80905 added 2 commits September 11, 2024 17:14

address farzon / justin, formatting

af4a0f6

more clang format

13878a4

bob80905 merged commit 2d47a0b into llvm:main Sep 12, 2024
9 checks passed

bob80905 mentioned this pull request Sep 18, 2024

Add cross builtins and cross HLSL function to DirectX and SPIR-V backend #109180

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add step builtins and step HLSL function to DirectX and SPIR-V backend #106471

Add step builtins and step HLSL function to DirectX and SPIR-V backend #106471

bob80905 commented Aug 28, 2024

llvmbot commented Aug 28, 2024 •

edited

Loading

bogner Sep 6, 2024

bogner Sep 6, 2024

bogner Sep 6, 2024

bogner Sep 6, 2024

farzonl Sep 11, 2024

farzonl Sep 11, 2024

github-actions bot commented Sep 11, 2024 •

edited

Loading

Add step builtins and step HLSL function to DirectX and SPIR-V backend #106471

Add step builtins and step HLSL function to DirectX and SPIR-V backend #106471

Conversation

bob80905 commented Aug 28, 2024

llvmbot commented Aug 28, 2024 • edited Loading

bogner Sep 6, 2024

Choose a reason for hiding this comment

bogner Sep 6, 2024

Choose a reason for hiding this comment

bogner Sep 6, 2024

Choose a reason for hiding this comment

bogner Sep 6, 2024

Choose a reason for hiding this comment

farzonl Sep 11, 2024

Choose a reason for hiding this comment

farzonl Sep 11, 2024

Choose a reason for hiding this comment

github-actions bot commented Sep 11, 2024 • edited Loading

llvmbot commented Aug 28, 2024 •

edited

Loading

github-actions bot commented Sep 11, 2024 •

edited

Loading