-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NFC][AMDGPU] Pre-commit tests for buffer contents legalization #110559
[NFC][AMDGPU] Pre-commit tests for buffer contents legalization #110559
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Krzysztof Drewniak (krzysz00) ChangesCurrently, many attempts to lower loads and stores on buffer fat pointers lower directly to intrnsic calls that will be unsupported by or crash codegen (ex, storing a [2 x i32], a <6 x half>, or an i160). Record the current behavior to make the effects of the fix more visible in an upcoming PR. Patch is 73.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110559.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll
new file mode 100644
index 00000000000000..ff17d8866aeb19
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll
@@ -0,0 +1,1685 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
+; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+target triple = "amdgcn--"
+
+;;; Legal types. These are natively supported, no casts should be performed.
+
+define i8 @load_i8(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define i8 @load_i8(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret i8 [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load i8, ptr addrspace(7) %p
+ ret i8 %ret
+}
+
+define void @store_i8(i8 %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_i8(
+; CHECK-SAME: i8 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store i8 %data, ptr addrspace(7) %p
+ ret void
+}
+
+define i16 @load_i16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define i16 @load_i16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret i16 [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load i16, ptr addrspace(7) %p
+ ret i16 %ret
+}
+
+define void @store_i16(i16 %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_i16(
+; CHECK-SAME: i16 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store i16 %data, ptr addrspace(7) %p
+ ret void
+}
+
+define i32 @load_i32(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define i32 @load_i32(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load i32, ptr addrspace(7) %p
+ ret i32 %ret
+}
+
+define void @store_i32(i32 %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_i32(
+; CHECK-SAME: i32 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store i32 %data, ptr addrspace(7) %p
+ ret void
+}
+
+define i64 @load_i64(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define i64 @load_i64(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.amdgcn.raw.ptr.buffer.load.i64(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret i64 [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load i64, ptr addrspace(7) %p
+ ret i64 %ret
+}
+
+define void @store_i64(i64 %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_i64(
+; CHECK-SAME: i64 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i64(i64 [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store i64 %data, ptr addrspace(7) %p
+ ret void
+}
+
+define i128 @load_i128(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define i128 @load_i128(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call i128 @llvm.amdgcn.raw.ptr.buffer.load.i128(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret i128 [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load i128, ptr addrspace(7) %p
+ ret i128 %ret
+}
+
+define void @store_i128(i128 %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_i128(
+; CHECK-SAME: i128 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i128(i128 [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store i128 %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <1 x i32> @load_v1i32(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <1 x i32> @load_v1i32(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <1 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v1i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <1 x i32> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <1 x i32>, ptr addrspace(7) %p
+ ret <1 x i32> %ret
+}
+
+define void @store_v1i32(<1 x i32> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v1i32(
+; CHECK-SAME: <1 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v1i32(<1 x i32> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <1 x i32> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <2 x i32> @load_v2i32(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <2 x i32> @load_v2i32(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <2 x i32>, ptr addrspace(7) %p
+ ret <2 x i32> %ret
+}
+
+define void @store_v2i32(<2 x i32> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v2i32(
+; CHECK-SAME: <2 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <2 x i32> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <3 x i32> @load_v3i32(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <3 x i32> @load_v3i32(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <3 x i32> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <3 x i32>, ptr addrspace(7) %p
+ ret <3 x i32> %ret
+}
+
+define void @store_v3i32(<3 x i32> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v3i32(
+; CHECK-SAME: <3 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <3 x i32> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <4 x i32> @load_v4i32(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <4 x i32> @load_v4i32(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <4 x i32> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <4 x i32>, ptr addrspace(7) %p
+ ret <4 x i32> %ret
+}
+
+define void @store_v4i32(<4 x i32> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v4i32(
+; CHECK-SAME: <4 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <4 x i32> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <2 x i16> @load_v2i16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <2 x i16> @load_v2i16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x i16> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <2 x i16>, ptr addrspace(7) %p
+ ret <2 x i16> %ret
+}
+
+define void @store_v2i16(<2 x i16> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v2i16(
+; CHECK-SAME: <2 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <2 x i16> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <4 x i16> @load_v4i16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <4 x i16> @load_v4i16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <4 x i16> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <4 x i16>, ptr addrspace(7) %p
+ ret <4 x i16> %ret
+}
+
+define void @store_v4i16(<4 x i16> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v4i16(
+; CHECK-SAME: <4 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <4 x i16> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <8 x i16> @load_v8i16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <8 x i16> @load_v8i16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <8 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v8i16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <8 x i16> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <8 x i16>, ptr addrspace(7) %p
+ ret <8 x i16> %ret
+}
+
+define void @store_v8i16(<8 x i16> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v8i16(
+; CHECK-SAME: <8 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v8i16(<8 x i16> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <8 x i16> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <2 x i64> @load_v2i64(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <2 x i64> @load_v2i64(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.amdgcn.raw.ptr.buffer.load.v2i64(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x i64> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <2 x i64>, ptr addrspace(7) %p
+ ret <2 x i64> %ret
+}
+
+define void @store_v2i64(<2 x i64> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v2i64(
+; CHECK-SAME: <2 x i64> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i64(<2 x i64> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <2 x i64> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define half @load_f16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define half @load_f16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret half [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load half, ptr addrspace(7) %p
+ ret half %ret
+}
+
+define void @store_f16(half %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_f16(
+; CHECK-SAME: half [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f16(half [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store half %data, ptr addrspace(7) %p
+ ret void
+}
+
+define bfloat @load_bf16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define bfloat @load_bf16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call bfloat @llvm.amdgcn.raw.ptr.buffer.load.bf16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret bfloat [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load bfloat, ptr addrspace(7) %p
+ ret bfloat %ret
+}
+
+define void @store_bf16(bfloat %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_bf16(
+; CHECK-SAME: bfloat [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.bf16(bfloat [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store bfloat %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <2 x half> @load_v2f16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <2 x half> @load_v2f16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x half> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <2 x half>, ptr addrspace(7) %p
+ ret <2 x half> %ret
+}
+
+define void @store_v2f16(<2 x half> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v2f16(
+; CHECK-SAME: <2 x half> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <2 x half> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <4 x bfloat> @load_v4bf16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <4 x bfloat> @load_v4bf16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <4 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v4bf16(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <4 x bfloat> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <4 x bfloat>, ptr addrspace(7) %p
+ ret <4 x bfloat> %ret
+}
+
+define void @store_v4bf16(<4 x bfloat> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v4bf16(
+; CHECK-SAME: <4 x bfloat> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4bf16(<4 x bfloat> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <4 x bfloat> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <8 x half> @load_v8f16(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <8 x half> @load_v8f16(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <8 x half> @llvm.amdgcn.raw.ptr.buffer.load.v8f16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <8 x half> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <8 x half>, ptr addrspace(7) %p
+ ret <8 x half> %ret
+}
+
+define void @store_v8f16(<8 x half> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v8f16(
+; CHECK-SAME: <8 x half> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v8f16(<8 x half> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <8 x half> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define float @load_f32(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define float @load_f32(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret float [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load float, ptr addrspace(7) %p
+ ret float %ret
+}
+
+define void @store_f32(float %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_f32(
+; CHECK-SAME: float [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store float %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <2 x float> @load_v2f32(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <2 x float> @load_v2f32(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x float> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <2 x float>, ptr addrspace(7) %p
+ ret <2 x float> %ret
+}
+
+define void @store_v2f32(<2 x float> %data, ptr addrspace(8) %buf) {
+; CHECK-LABEL: define void @store_v2f32(
+; CHECK-SAME: <2 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store <2 x float> %data, ptr addrspace(7) %p
+ ret void
+}
+
+define <3 x float> @load_v3f32(ptr addrspace(8) %buf) {
+; CHECK-LABEL: define <3 x float> @load_v3f32(
+; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: ret <3 x float> [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load <3 x float>, ptr addrspace(7) %p
+ ret <3 x float> %ret
+}
+
+define void...
[truncated]
|
; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s | ||
; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s | ||
|
||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These two are not needed if using -mtriple=amdgcn
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All the existing buffer fat pointer tests explicitly spell out the data layout so I figured I'd follow convention
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's rare to have datalayout / triple in the test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should I fix that up here or in a separate bit of NFC?
(And this is testing a pass that needs the layout set correctly)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The default layout should be the correct one as long as the triple is set correctly. The explicit data layout specification is only required if one wants to override the default one (for whatever reason).
|
@@ -0,0 +1,1685 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't bother with the old PM run line
Currently, many attempts to lower loads and stores on buffer fat pointers lower directly to intrnsic calls that will be unsupported by or crash codegen (ex, storing a [2 x i32], a <6 x half>, or an i160). Record the current behavior to make the effects of the fix more visible in an upcoming PR.
e8bb78b
to
48396a4
Compare
…#110559) Currently, many attempts to lower loads and stores on buffer fat pointers lower directly to intrinsic calls that will be unsupported by or crash codegen (ex, storing a [2 x i32], a <6 x half>, or an i160). Record the current behavior to make the effects of the fix more visible in an upcoming PR.
…#110559) Currently, many attempts to lower loads and stores on buffer fat pointers lower directly to intrinsic calls that will be unsupported by or crash codegen (ex, storing a [2 x i32], a <6 x half>, or an i160). Record the current behavior to make the effects of the fix more visible in an upcoming PR.
…#110559) Currently, many attempts to lower loads and stores on buffer fat pointers lower directly to intrinsic calls that will be unsupported by or crash codegen (ex, storing a [2 x i32], a <6 x half>, or an i160). Record the current behavior to make the effects of the fix more visible in an upcoming PR.
…#110559) Currently, many attempts to lower loads and stores on buffer fat pointers lower directly to intrinsic calls that will be unsupported by or crash codegen (ex, storing a [2 x i32], a <6 x half>, or an i160). Record the current behavior to make the effects of the fix more visible in an upcoming PR.
Currently, many attempts to lower loads and stores on buffer fat pointers lower directly to intrinsic calls that will be unsupported by or crash codegen (ex, storing a [2 x i32], a <6 x half>, or an i160).
Record the current behavior to make the effects of the fix more visible in an upcoming PR.