Skip to content

Commit

Permalink
[DirectX] Lower @llvm.dx.typedBufferStore to DXIL ops
Browse files Browse the repository at this point in the history
The `@llvm.dx.typedBufferStore` intrinsic is lowered to `@dx.op.bufferStore`.

Pull Request: llvm#104253
  • Loading branch information
bogner committed Aug 14, 2024
1 parent d78ffd2 commit 151e561
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 5 deletions.
6 changes: 3 additions & 3 deletions llvm/docs/DirectX/DXILResources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -365,11 +365,11 @@ Examples:

.. code-block:: llvm
call void @llvm.dx.bufferStore.tdx.Buffer_f32_1_0t(
call void @llvm.dx.typedBufferStore.tdx.Buffer_v4f32_1_0_0t(
target("dx.TypedBuffer", f32, 1, 0) %buf, i32 %index, <4 x f32> %data)
call void @llvm.dx.bufferStore.tdx.Buffer_f16_1_0t(
call void @llvm.dx.typedBufferStore.tdx.Buffer_v4f16_1_0_0t(
target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data)
call void @llvm.dx.bufferStore.tdx.Buffer_f64_1_0t(
call void @llvm.dx.typedBufferStore.tdx.Buffer_v2f64_1_0_0t(
target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data)
.. list-table:: ``@llvm.dx.rawBufferPtr``
Expand Down
5 changes: 3 additions & 2 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ def int_dx_handle_fromBinding
[IntrNoMem]>;

def int_dx_typedBufferLoad
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_any_ty, llvm_i32_ty]>;
: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_any_ty, llvm_i32_ty]>;
def int_dx_typedBufferStore
: DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;

// Cast between target extension handle types and dxil-style opaque handles
def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,18 @@ def BufferLoad : DXILOp<68, bufferLoad> {
let stages = [Stages<DXIL1_0, [all_stages]>];
}

def BufferStore : DXILOp<69, bufferStore> {
let Doc = "writes to an RWTypedBuffer";
// Handle, Coord0, Coord1, Val0, Val1, Val2, Val3, Mask
let arguments = [
HandleTy, Int32Ty, Int32Ty, OverloadTy, OverloadTy, OverloadTy, OverloadTy,
Int8Ty
];
let result = VoidTy;
let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, Int16Ty, Int32Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
}

def ThreadId : DXILOp<93, threadId> {
let Doc = "Reads the thread ID";
let LLVMIntrinsic = int_dx_thread_id;
Expand Down
40 changes: 40 additions & 0 deletions llvm/lib/Target/DirectX/DXILOpLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,43 @@ class OpLowerer {
});
}

void lowerTypedBufferStore(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
Type *Int32Ty = IRB.getInt32Ty();

replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);

Value *Handle =
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
Value *Index0 = CI->getArgOperand(1);
Value *Index1 = UndefValue::get(Int32Ty);
// For typed stores, the mask must always cover all four elements.
Constant *Mask = ConstantInt::get(Int8Ty, 0xF);

Value *Data = CI->getArgOperand(2);
Value *Data0 =
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 0));
Value *Data1 =
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 1));
Value *Data2 =
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 2));
Value *Data3 =
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 3));

std::array<Value *, 8> Args{Handle, Index0, Index1, Data0,
Data1, Data2, Data3, Mask};
Expected<CallInst *> OpCall =
OpBuilder.tryCreateOp(OpCode::BufferStore, Args);
if (Error E = OpCall.takeError())
return E;

CI->eraseFromParent();
return Error::success();
});
}

bool lowerIntrinsics() {
bool Updated = false;

Expand All @@ -310,6 +347,9 @@ class OpLowerer {
case Intrinsic::dx_typedBufferLoad:
lowerTypedBufferLoad(F);
break;
case Intrinsic::dx_typedBufferStore:
lowerTypedBufferStore(F);
break;
}
Updated = true;
}
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/DirectX/BufferStore.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; RUN: opt -S -dxil-op-lower %s | FileCheck %s

target triple = "dxil-pc-shadermodel6.6-compute"

define void @storefloat(<4 x float> %data, i32 %index) {

; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
%buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
i32 0, i32 0, i32 1, i32 0, i1 false)

; The temporary casts should all have been cleaned up
; CHECK-NOT: %dx.cast_handle

; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data, i32 0
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data, i32 1
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data, i32 2
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data, i32 3
; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15)
call void @llvm.dx.typedBufferStore(
target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer,
i32 %index, <4 x float> %data)

ret void
}

define void @storeint(<4 x i32> %data, i32 %index) {

; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
%buffer = call target("dx.TypedBuffer", <4 x i32>, 1, 0, 0)
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i32_1_0_0(
i32 0, i32 0, i32 1, i32 0, i1 false)

; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i32> %data, i32 0
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i32> %data, i32 1
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i32> %data, i32 2
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i32> %data, i32 3
; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15)
call void @llvm.dx.typedBufferStore(
target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer,
i32 %index, <4 x i32> %data)

ret void
}

define void @storehalf(<4 x half> %data, i32 %index) {

; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
%buffer = call target("dx.TypedBuffer", <4 x half>, 1, 0, 0)
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f16_1_0_0(
i32 0, i32 0, i32 1, i32 0, i1 false)

; The temporary casts should all have been cleaned up
; CHECK-NOT: %dx.cast_handle

; CHECK: [[DATA0_0:%.*]] = extractelement <4 x half> %data, i32 0
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x half> %data, i32 1
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x half> %data, i32 2
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x half> %data, i32 3
; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15)
call void @llvm.dx.typedBufferStore(
target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer,
i32 %index, <4 x half> %data)

ret void
}

define void @storei16(<4 x i16> %data, i32 %index) {

; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
%buffer = call target("dx.TypedBuffer", <4 x i16>, 1, 0, 0)
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i16_1_0_0(
i32 0, i32 0, i32 1, i32 0, i1 false)

; The temporary casts should all have been cleaned up
; CHECK-NOT: %dx.cast_handle

; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i16> %data, i32 0
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i16> %data, i32 1
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i16> %data, i32 2
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i16> %data, i32 3
; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15)
call void @llvm.dx.typedBufferStore(
target("dx.TypedBuffer", <4 x i16>, 1, 0, 0) %buffer,
i32 %index, <4 x i16> %data)

ret void
}

0 comments on commit 151e561

Please sign in to comment.