-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DirectX] Lower @llvm.dx.typedBufferStore
to DXIL ops
#104253
[DirectX] Lower @llvm.dx.typedBufferStore
to DXIL ops
#104253
Conversation
Created using spr 1.3.5-bogner [skip ci]
Created using spr 1.3.5-bogner
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-directx Author: Justin Bogner (bogner) ChangesThe Full diff: https://github.com/llvm/llvm-project/pull/104253.diff 5 Files Affected:
diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst
index aef88bc43b224d..cd6c527a546610 100644
--- a/llvm/docs/DirectX/DXILResources.rst
+++ b/llvm/docs/DirectX/DXILResources.rst
@@ -365,11 +365,11 @@ Examples:
.. code-block:: llvm
- call void @llvm.dx.bufferStore.tdx.Buffer_f32_1_0t(
+ call void @llvm.dx.typedBufferStore.tdx.Buffer_v4f32_1_0_0t(
target("dx.TypedBuffer", f32, 1, 0) %buf, i32 %index, <4 x f32> %data)
- call void @llvm.dx.bufferStore.tdx.Buffer_f16_1_0t(
+ call void @llvm.dx.typedBufferStore.tdx.Buffer_v4f16_1_0_0t(
target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data)
- call void @llvm.dx.bufferStore.tdx.Buffer_f64_1_0t(
+ call void @llvm.dx.typedBufferStore.tdx.Buffer_v2f64_1_0_0t(
target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data)
.. list-table:: ``@llvm.dx.rawBufferPtr``
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index d817b610fa71a0..67351ad8f9b91f 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -31,8 +31,9 @@ def int_dx_handle_fromBinding
[IntrNoMem]>;
def int_dx_typedBufferLoad
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [llvm_any_ty, llvm_i32_ty]>;
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_any_ty, llvm_i32_ty]>;
+def int_dx_typedBufferStore
+ : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;
// Cast between target extension handle types and dxil-style opaque handles
def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index b114148f84e843..50eff20e810d79 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -697,6 +697,18 @@ def BufferLoad : DXILOp<68, bufferLoad> {
let stages = [Stages<DXIL1_0, [all_stages]>];
}
+def BufferStore : DXILOp<69, bufferStore> {
+ let Doc = "writes to an RWTypedBuffer";
+ // Handle, Coord0, Coord1, Val0, Val1, Val2, Val3, Mask
+ let arguments = [
+ HandleTy, Int32Ty, Int32Ty, OverloadTy, OverloadTy, OverloadTy, OverloadTy,
+ Int8Ty
+ ];
+ let result = VoidTy;
+ let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, Int16Ty, Int32Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
def ThreadId : DXILOp<93, threadId> {
let Doc = "Reads the thread ID";
let LLVMIntrinsic = int_dx_thread_id;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 46dfc905b5875c..9d5377edd3a780 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -289,6 +289,43 @@ class OpLowerer {
});
}
+ void lowerTypedBufferStore(Function &F) {
+ IRBuilder<> &IRB = OpBuilder.getIRB();
+ Type *Int8Ty = IRB.getInt8Ty();
+ Type *Int32Ty = IRB.getInt32Ty();
+
+ replaceFunction(F, [&](CallInst *CI) -> Error {
+ IRB.SetInsertPoint(CI);
+
+ Value *Handle =
+ createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+ Value *Index0 = CI->getArgOperand(1);
+ Value *Index1 = UndefValue::get(Int32Ty);
+ // For typed stores, the mask must always cover all four elements.
+ Constant *Mask = ConstantInt::get(Int8Ty, 0xF);
+
+ Value *Data = CI->getArgOperand(2);
+ Value *Data0 =
+ IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 0));
+ Value *Data1 =
+ IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 1));
+ Value *Data2 =
+ IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 2));
+ Value *Data3 =
+ IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 3));
+
+ std::array<Value *, 8> Args{Handle, Index0, Index1, Data0,
+ Data1, Data2, Data3, Mask};
+ Expected<CallInst *> OpCall =
+ OpBuilder.tryCreateOp(OpCode::BufferStore, Args);
+ if (Error E = OpCall.takeError())
+ return E;
+
+ CI->eraseFromParent();
+ return Error::success();
+ });
+ }
+
bool lowerIntrinsics() {
bool Updated = false;
@@ -310,6 +347,9 @@ class OpLowerer {
case Intrinsic::dx_typedBufferLoad:
lowerTypedBufferLoad(F);
break;
+ case Intrinsic::dx_typedBufferStore:
+ lowerTypedBufferStore(F);
+ break;
}
Updated = true;
}
diff --git a/llvm/test/CodeGen/DirectX/BufferStore.ll b/llvm/test/CodeGen/DirectX/BufferStore.ll
new file mode 100644
index 00000000000000..102084816a6f24
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStore.ll
@@ -0,0 +1,92 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @storefloat(<4 x float> %data, i32 %index) {
+
+ ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+ ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
+ %buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.cast_handle
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer,
+ i32 %index, <4 x float> %data)
+
+ ret void
+}
+
+define void @storeint(<4 x i32> %data, i32 %index) {
+
+ ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+ ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
+ %buffer = call target("dx.TypedBuffer", <4 x i32>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i32_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i32> %data, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i32> %data, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i32> %data, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i32> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer,
+ i32 %index, <4 x i32> %data)
+
+ ret void
+}
+
+define void @storehalf(<4 x half> %data, i32 %index) {
+
+ ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+ ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
+ %buffer = call target("dx.TypedBuffer", <4 x half>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f16_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.cast_handle
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x half> %data, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x half> %data, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x half> %data, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x half> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer,
+ i32 %index, <4 x half> %data)
+
+ ret void
+}
+
+define void @storei16(<4 x i16> %data, i32 %index) {
+
+ ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+ ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
+ %buffer = call target("dx.TypedBuffer", <4 x i16>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i16_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.cast_handle
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i16> %data, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i16> %data, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i16> %data, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i16> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x i16>, 1, 0, 0) %buffer,
+ i32 %index, <4 x i16> %data)
+
+ ret void
+}
|
Depends on #104252 |
The `@llvm.dx.typedBufferStore` intrinsic is lowered to `@dx.op.bufferStore`. Pull Request: llvm#104253
Created using spr 1.3.5-bogner [skip ci]
Created using spr 1.3.5-bogner
Created using spr 1.3.5-bogner [skip ci]
Created using spr 1.3.5-bogner
Please take another look - specifically I'd like some review on the doc changes to make sure they're clear. |
} | ||
Updated = true; | ||
} | ||
if (Updated) | ||
if (Updated && !HasErrors) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So dx_typedBufferLoad cannot fail?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can fail in the same ways any of the op lowering can fail (like say if we just give it completely incorrect types) but I think it happens to always fail in such a way that the cleanup wouldn't crash. The HasErrors check is added here so that we can carry on and finish the pass and just let LLVM's error handling propagate the error afterwards, because when lowerTypedBufferStore
fails it can leave the temporary casts in a bad state.
All that said, we can and should add some tests for cases where lowering loads fails. I'll do that in a follow up change.
Created using spr 1.3.5-bogner [skip ci]
Created using spr 1.3.5-bogner
Created using spr 1.3.5-bogner
The `@llvm.dx.typedBufferStore` intrinsic is lowered to `@dx.op.bufferStore`. Pull Request: llvm#104253
The
@llvm.dx.typedBufferStore
intrinsic is lowered to@dx.op.bufferStore
.