-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPIR-V] Add cl_khr_kernel_clock / SPV_KHR_shader_clock extension #92771
Conversation
Recognize `cl_khr_kernel_clock` builtins and translate them to `OpReadClockKHR` instructions. The `Scope` operand is deduced from the builtin function name. spirv-val does not pass yet due to OpReadClockKHR only supporting the valid scopes for Vulkan (Device and Subgroup, but not Workgroup), so leave validation disabled with a TODO.
@llvm/pr-subscribers-backend-spir-v Author: Sven van Haastregt (svenvh) ChangesRecognize spirv-val does not pass yet due to OpReadClockKHR only supporting the valid scopes for Vulkan (Device and Subgroup, but not Workgroup), so leave validation disabled with a TODO. Provisional extension description: https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#cl_khr_kernel_clock . Full diff: https://github.com/llvm/llvm-project/pull/92771.diff 7 Files Affected:
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 9fde26c900f51..424087f361a6a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1118,6 +1118,39 @@ static bool generateGroupUniformInst(const SPIRV::IncomingCall *Call,
return true;
}
+static bool generateKernelClockInst(const SPIRV::IncomingCall *Call,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVGlobalRegistry *GR) {
+ const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
+ MachineFunction &MF = MIRBuilder.getMF();
+ const auto *ST = static_cast<const SPIRVSubtarget *>(&MF.getSubtarget());
+ if (!ST->canUseExtension(SPIRV::Extension::SPV_KHR_shader_clock)) {
+ std::string DiagMsg = std::string(Builtin->Name) +
+ ": the builtin requires the following SPIR-V "
+ "extension: SPV_KHR_shader_clock";
+ report_fatal_error(DiagMsg.c_str(), false);
+ }
+
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ Register ResultReg = Call->ReturnRegister;
+ MRI->setRegClass(ResultReg, &SPIRV::IDRegClass);
+
+ // Deduce the `Scope` operand from the builtin function name.
+ SPIRV::Scope::Scope ScopeArg =
+ StringSwitch<SPIRV::Scope::Scope>(Builtin->Name)
+ .EndsWith("device", SPIRV::Scope::Scope::Device)
+ .EndsWith("work_group", SPIRV::Scope::Scope::Workgroup)
+ .EndsWith("sub_group", SPIRV::Scope::Scope::Subgroup);
+ Register ScopeReg = buildConstantIntReg(ScopeArg, MIRBuilder, GR);
+
+ MIRBuilder.buildInstr(SPIRV::OpReadClockKHR)
+ .addDef(ResultReg)
+ .addUse(GR->getSPIRVTypeID(Call->ReturnType))
+ .addUse(ScopeReg);
+
+ return true;
+}
+
// These queries ask for a single size_t result for a given dimension index, e.g
// size_t get_global_id(uint dimindex). In SPIR-V, the builtins corresonding to
// these values are all vec3 types, so we need to extract the correct index or
@@ -2290,6 +2323,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
return generateIntelSubgroupsInst(Call.get(), MIRBuilder, GR);
case SPIRV::GroupUniform:
return generateGroupUniformInst(Call.get(), MIRBuilder, GR);
+ case SPIRV::KernelClock:
+ return generateKernelClockInst(Call.get(), MIRBuilder, GR);
}
return false;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 564028547821e..692234c405ab6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -58,6 +58,7 @@ def LoadStore : BuiltinGroup;
def IntelSubgroups : BuiltinGroup;
def AtomicFloating : BuiltinGroup;
def GroupUniform : BuiltinGroup;
+def KernelClock : BuiltinGroup;
//===----------------------------------------------------------------------===//
// Class defining a demangled builtin record. The information in the record
@@ -952,6 +953,14 @@ defm : DemangledGroupBuiltin<"group_scan_exclusive_logical_xor", OnlyWork, OpGro
defm : DemangledGroupBuiltin<"group_scan_inclusive_logical_xor", OnlyWork, OpGroupLogicalXorKHR>;
defm : DemangledGroupBuiltin<"group_reduce_logical_xor", OnlyWork, OpGroupLogicalXorKHR>;
+// cl_khr_kernel_clock / SPV_KHR_shader_clock
+defm : DemangledNativeBuiltin<"clock_read_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_hilo_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_hilo_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_hilo_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+
//===----------------------------------------------------------------------===//
// Class defining an atomic instruction on floating-point numbers.
//
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 691e6ee0e5829..752d71eddd99a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -55,6 +55,8 @@ static const std::map<std::string, SPIRV::Extension::Extension>
SPIRV::Extension::Extension::SPV_INTEL_variable_length_array},
{"SPV_INTEL_function_pointers",
SPIRV::Extension::Extension::SPV_INTEL_function_pointers},
+ {"SPV_KHR_shader_clock",
+ SPIRV::Extension::Extension::SPV_KHR_shader_clock},
};
bool SPIRVExtensionsParser::parse(cl::Option &O, llvm::StringRef ArgName,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 151d0ec1fe569..a6bedab6d4ee5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -802,6 +802,11 @@ def OpGroupNonUniformRotateKHR: Op<4431, (outs ID:$res),
(ins TYPE:$type, ID:$scope, ID:$value, ID:$delta, variable_ops),
"$res = OpGroupNonUniformRotateKHR $type $scope $value $delta">;
+// SPV_KHR_shader_clock
+def OpReadClockKHR: Op<5056, (outs ID:$res),
+ (ins TYPE:$type, ID:$scope),
+ "$res = OpReadClockKHR $type $scope">;
+
// 3.49.7, Constant-Creation Instructions
// - SPV_INTEL_function_pointers
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 235f947901d83..cbe7c5ca30570 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1117,6 +1117,14 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::GroupUniformArithmeticKHR);
}
break;
+ case SPIRV::OpReadClockKHR:
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_KHR_shader_clock))
+ report_fatal_error("OpReadClockKHR instruction requires the "
+ "following SPIR-V extension: SPV_KHR_shader_clock",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_KHR_shader_clock);
+ Reqs.addCapability(SPIRV::Capability::ShaderClockKHR);
+ break;
case SPIRV::OpFunctionPointerCallINTEL:
if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_function_pointers)) {
Reqs.addExtension(SPIRV::Extension::SPV_INTEL_function_pointers);
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 31e19ad8630cd..50d327179fa84 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -413,6 +413,7 @@ defm ImageGatherBiasLodAMD : CapabilityOperand<5009, 0, 0, [], [Shader]>;
defm FragmentMaskAMD : CapabilityOperand<5010, 0, 0, [], [Shader]>;
defm StencilExportEXT : CapabilityOperand<5013, 0, 0, [], [Shader]>;
defm ImageReadWriteLodAMD : CapabilityOperand<5015, 0, 0, [], [Shader]>;
+defm ShaderClockKHR : CapabilityOperand<5055, 0, 0, [SPV_KHR_shader_clock], []>;
defm SampleMaskOverrideCoverageNV : CapabilityOperand<5249, 0, 0, [], [SampleRateShading]>;
defm GeometryShaderPassthroughNV : CapabilityOperand<5251, 0, 0, [], [Geometry]>;
defm ShaderViewportIndexLayerEXT : CapabilityOperand<5254, 0, 0, [], [MultiViewport]>;
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_shader_clock/shader_clock.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_shader_clock/shader_clock.ll
new file mode 100644
index 0000000000000..2c984c7017f95
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_shader_clock/shader_clock.ll
@@ -0,0 +1,87 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_shader_clock %s -o - | FileCheck %s
+; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_shader_clock %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-ERROR: LLVM ERROR: clock_read_device: the builtin requires the following SPIR-V extension: SPV_KHR_shader_clock
+
+; CHECK: OpCapability ShaderClockKHR
+; CHECK: OpExtension "SPV_KHR_shader_clock"
+; CHECK-DAG: [[uint:%[a-z0-9_]+]] = OpTypeInt 32
+; CHECK-DAG: [[ulong:%[a-z0-9_]+]] = OpTypeInt 64
+; CHECK-DAG: [[v2uint:%[a-z0-9_]+]] = OpTypeVector [[uint]] 2
+; CHECK-DAG: [[uint_1:%[a-z0-9_]+]] = OpConstant [[uint]] 1
+; CHECK-DAG: [[uint_2:%[a-z0-9_]+]] = OpConstant [[uint]] 2
+; CHECK-DAG: [[uint_3:%[a-z0-9_]+]] = OpConstant [[uint]] 3
+; CHECK: OpReadClockKHR [[ulong]] [[uint_1]]
+; CHECK: OpReadClockKHR [[ulong]] [[uint_2]]
+; CHECK: OpReadClockKHR [[ulong]] [[uint_3]]
+; CHECK: OpReadClockKHR [[v2uint]] [[uint_1]]
+; CHECK: OpReadClockKHR [[v2uint]] [[uint_2]]
+; CHECK: OpReadClockKHR [[v2uint]] [[uint_3]]
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"
+target triple = "spir-unknown-unknown"
+
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_kernel void @test_clocks(ptr addrspace(1) nocapture noundef writeonly align 8 %out64, ptr addrspace(1) nocapture noundef writeonly align 8 %outv2) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+entry:
+ %call = tail call spir_func i64 @_Z17clock_read_devicev() #2
+ store i64 %call, ptr addrspace(1) %out64, align 8, !tbaa !8
+ %call1 = tail call spir_func i64 @_Z21clock_read_work_groupv() #2
+ %arrayidx2 = getelementptr inbounds i8, ptr addrspace(1) %out64, i32 8
+ store i64 %call1, ptr addrspace(1) %arrayidx2, align 8, !tbaa !8
+ %call3 = tail call spir_func i64 @_Z20clock_read_sub_groupv() #2
+ %arrayidx4 = getelementptr inbounds i8, ptr addrspace(1) %out64, i32 16
+ store i64 %call3, ptr addrspace(1) %arrayidx4, align 8, !tbaa !8
+ %call5 = tail call spir_func <2 x i32> @_Z22clock_read_hilo_devicev() #2
+ store <2 x i32> %call5, ptr addrspace(1) %outv2, align 8, !tbaa !12
+ %call7 = tail call spir_func <2 x i32> @_Z26clock_read_hilo_work_groupv() #2
+ %arrayidx8 = getelementptr inbounds i8, ptr addrspace(1) %outv2, i32 8
+ store <2 x i32> %call7, ptr addrspace(1) %arrayidx8, align 8, !tbaa !12
+ %call9 = tail call spir_func <2 x i32> @_Z25clock_read_hilo_sub_groupv() #2
+ %arrayidx10 = getelementptr inbounds i8, ptr addrspace(1) %outv2, i32 16
+ store <2 x i32> %call9, ptr addrspace(1) %arrayidx10, align 8, !tbaa !12
+ ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z17clock_read_devicev() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z21clock_read_work_groupv() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z20clock_read_sub_groupv() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func <2 x i32> @_Z22clock_read_hilo_devicev() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func <2 x i32> @_Z26clock_read_hilo_work_groupv() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func <2 x i32> @_Z25clock_read_hilo_sub_groupv() local_unnamed_addr #1
+
+attributes #0 = { convergent norecurse nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
+attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent nounwind }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 19.0.0git"}
+!3 = !{i32 1, i32 1}
+!4 = !{!"none", !"none"}
+!5 = !{!"ulong*", !"uint2*"}
+!6 = !{!"ulong*", !"uint __attribute__((ext_vector_type(2)))*"}
+!7 = !{!"", !""}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C/C++ TBAA"}
+!12 = !{!10, !10, i64 0}
|
llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_shader_clock/shader_clock.ll
Outdated
Show resolved
Hide resolved
@svenvh Thank you and very glad to see this contribution! My only suggestions are about the test case, with a general goal to make it slightly cleaner, devoid of unimportant details -- otherwise PR looks good. |
) This PR updates docs to describe support of SPV_KHR_shader_clock extension added by #92771.
Recognize
cl_khr_kernel_clock
builtins and translate them toOpReadClockKHR
instructions. TheScope
operand is deduced from the builtin function name.spirv-val does not pass yet due to OpReadClockKHR only supporting the valid scopes for Vulkan (Device and Subgroup, but not Workgroup), so leave validation disabled with a TODO.
Provisional extension description: https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#cl_khr_kernel_clock .
Builtins: https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#kernel-clock-functions .
SPIR-V environment: https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Env.html#_cl_khr_kernel_clock .