From 6a440058ab4abf27197c6d2a283a5e045e569650 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Tue, 17 Sep 2024 15:06:02 -0400 Subject: [PATCH] [GPU] Use alloca for private memory allocations (#18540) Without this patch, some `memref.alloc` allocations that fail to be optimize out remained as `malloc` in the final binary. Fixes: https://github.com/iree-org/iree/issues/18534 --- .../src/iree/compiler/Codegen/LLVMGPU/Passes.cpp | 16 ++++++++++------ .../LLVMGPU/test/conv_pipeline_test_rocm.mlir | 2 +- .../Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp index cd89b59d642e..0f13dde34856 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp @@ -107,15 +107,19 @@ static FailureOr gpuAllocationFn(OpBuilder &builder, Location loc, if (!enclosingForall) { enclosingForall = parent->getParentOfType(); } - gpu::AddressSpaceAttr addressSpace; if (enclosingForall && hasThreadMapping(enclosingForall)) { - addressSpace = gpu::AddressSpaceAttr::get( + auto addressSpace = gpu::AddressSpaceAttr::get( builder.getContext(), gpu::GPUDialect::getPrivateAddressSpace()); - } else { - addressSpace = gpu::AddressSpaceAttr::get( - builder.getContext(), gpu::GPUDialect::getWorkgroupAddressSpace()); + auto allocType = + MemRefType::get(memRefType.getShape(), memRefType.getElementType(), + AffineMap(), addressSpace); + return builder.create(loc, allocType, dynamicSizes) + .getResult(); } - MemRefType allocType = + + auto addressSpace = gpu::AddressSpaceAttr::get( + builder.getContext(), gpu::GPUDialect::getWorkgroupAddressSpace()); + auto allocType = MemRefType::get(memRefType.getShape(), memRefType.getElementType(), AffineMap(), addressSpace); return builder.create(loc, allocType, dynamicSizes) diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir index ec67064e9f82..b33502ef2676 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/conv_pipeline_test_rocm.mlir @@ -49,5 +49,5 @@ hal.executable private @conv_nchw_dispatch_1 { // eliminated. // CHECK-LABEL: func @conv_2d_nchw_fchw_2x320x64x64x320x3x3_f16 -// CHECK-COUNT-3: memref.alloc() : memref<1x1x1x4xf16, #gpu.address_space> +// CHECK-COUNT-3: memref.alloca() : memref<1x1x1x4xf16, #gpu.address_space> // CHECK-COUNT-3: memref.copy %{{.*}}, %{{.*}} : memref<1x1x1x4xf16, #gpu.address_space> to memref<{{.*}} #hal.descriptor_type> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir index 73bdb91698e9..2b29df06e71a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/llvmgpu_bufferize.mlir @@ -32,7 +32,7 @@ func.func @bufferize_with_thread_private_memory(%arg0: index) { } // CHECK-LABEL: func.func @bufferize_with_thread_private_memory // CHECK: scf.forall {{.*}} in (2, 16) { -// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<1x1x4x4xf16, #gpu.address_space> +// CHECK: %[[ALLOC:.+]] = memref.alloca() : memref<1x1x4x4xf16, #gpu.address_space> // CHECK: memref.copy %{{.*}}, %[[ALLOC]] // CHECK-SAME: memref<1x1x4x4xf16, strided<[1310720, 4096, 64, 1], offset: ?>, #hal.descriptor_type> // CHECK-SAME: to memref<1x1x4x4xf16, #gpu.address_space>