From 9fb1fe878f3efea031a2b707f3d4025eee8f104c Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 18 Jul 2024 15:33:03 -0400 Subject: [PATCH] [Clang][AMDGPU] Add builtins for instrinsic `llvm.amdgcn.raw.ptr.buffer.load` (#99258) --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 6 + clang/lib/CodeGen/CGBuiltin.cpp | 33 ++++ .../builtins-amdgcn-raw-buffer-load.cl | 172 ++++++++++++++++++ .../builtins-amdgcn-raw-buffer-load-error.cl | 33 ++++ 4 files changed, 244 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index e62315eea277ab5..774cbaa74f8afd7 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -155,6 +155,12 @@ BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "viQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_store_b64, "vV2iQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_store_b96, "vV3iQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_store_b128, "vV4iQbiiIi", "n") +BUILTIN(__builtin_amdgcn_raw_buffer_load_b8, "UcQbiiIi", "n") +BUILTIN(__builtin_amdgcn_raw_buffer_load_b16, "UsQbiiIi", "n") +BUILTIN(__builtin_amdgcn_raw_buffer_load_b32, "UiQbiiIi", "n") +BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n") +BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n") +BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n") //===----------------------------------------------------------------------===// // Ballot builtins. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2ad62d6ee0bb285..f426570b17e1523 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19185,6 +19185,39 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128: return emitBuiltinWithOneOverloadedType<5>( *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store); + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: { + llvm::Type *RetTy = nullptr; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: + RetTy = Int8Ty; + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: + RetTy = Int16Ty; + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: + RetTy = Int32Ty; + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: + RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: + RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: + RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4); + break; + } + Function *F = + CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy); + return Builder.CreateCall( + F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), + EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); + } default: return nullptr; } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl new file mode 100644 index 000000000000000..3403b69e07e4beb --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl @@ -0,0 +1,172 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned int v2u32 __attribute__((ext_vector_type(2))); +typedef unsigned int v3u32 __attribute__((ext_vector_type(3))); +typedef unsigned int v4u32 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT: ret i8 [[TMP0]] +// +u8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT: ret i16 [[TMP0]] +// +u16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT: ret i32 [[TMP0]] +// +u32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <2 x i32> [[TMP0]] +// +v2u32 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <3 x i32> [[TMP0]] +// +v3u32 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret i8 [[TMP0]] +// +u8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b8(rsrc, offset, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret i16 [[TMP0]] +// +u16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b16(rsrc, offset, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret i32 [[TMP0]] +// +u32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b32(rsrc, offset, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <2 x i32> [[TMP0]] +// +v2u32 test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b64(rsrc, offset, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <3 x i32> [[TMP0]] +// +v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b96(rsrc, offset, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b128(rsrc, offset, /*soffset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0) +// CHECK-NEXT: ret i8 [[TMP0]] +// +u8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, soffset, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0) +// CHECK-NEXT: ret i16 [[TMP0]] +// +u16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, soffset, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0) +// CHECK-NEXT: ret i32 [[TMP0]] +// +u32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, soffset, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0) +// CHECK-NEXT: ret <2 x i32> [[TMP0]] +// +v2u32 test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, soffset, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0) +// CHECK-NEXT: ret <3 x i32> [[TMP0]] +// +v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, soffset, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { + return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl new file mode 100644 index 000000000000000..5d123c8e81d8777 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned int v2u32 __attribute__((ext_vector_type(2))); +typedef unsigned int v3u32 __attribute__((ext_vector_type(3))); +typedef unsigned int v4u32 __attribute__((ext_vector_type(4))); + +u8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) { + return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b8' must be a constant integer}} +} + +u16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) { + return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b16' must be a constant integer}} +} + +u32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) { + return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b32' must be a constant integer}} +} + +v2u32 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) { + return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b64' must be a constant integer}} +} + +v3u32 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) { + return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b96' must be a constant integer}} +} + +v4u32 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) { + return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b128' must be a constant integer}} +}