[llvm][opt][Transforms][SPIR-V] Enable `InferAddressSpaces` for SPIR-V #110897

AlexVlx · 2024-10-02T17:19:33Z

Albeit not currently enabled, the InferAddressSpaces pass is desirable / profitable for SPIR-V, as it can leverage info that might subsequently be lost as transforms are applied to the IR/resulting SPIR-V. This patch enables the pass for all SPIR-V targets, and is modelled after the AMDGPU implementation.

…ras_for_spirv

llvmbot · 2024-10-02T17:20:09Z

@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-spir-v

@llvm/pr-subscribers-clang

Author: Alex Voicu (AlexVlx)

Changes

Albeit not currently enabled, the InferAddressSpaces pass is desirable / profitable for SPIR-V, as it can leverage info that might subsequently be lost as transforms are applied to the IR/resulting SPIR-V. This patch enables the pass for all SPIR-V targets, and is modelled after the AMDGPU implementation.

Patch is 93.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110897.diff

20 Files Affected:

(modified) clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu (+27-35)
(modified) llvm/lib/Target/SPIRV/CMakeLists.txt (+2)
(modified) llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp (+92)
(modified) llvm/lib/Target/SPIRV/SPIRVTargetMachine.h (+7)
(modified) llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h (+4)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll (+31)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll (+236)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll (+211)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll (+65)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll (+108)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll (+158)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll (+57)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg (+2)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll (+145)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll (+70)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll (+60)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll (+48)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll (+28)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll (+29)
(added) llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll (+187)

diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index b295bbbdaaf955..15c8b46d278ea1 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -58,13 +58,11 @@
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT:    [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
-// OPT-SPIRV-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
-// OPT-SPIRV-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
-// OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// OPT-SPIRV-NEXT:    store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-SPIRV-NEXT:    store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
 // OPT-SPIRV-NEXT:    ret void
 //
 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi(
@@ -126,13 +124,11 @@ __global__ void kernel1(int *x) {
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT:    [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
-// OPT-SPIRV-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
-// OPT-SPIRV-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
-// OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// OPT-SPIRV-NEXT:    store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-SPIRV-NEXT:    store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
 // OPT-SPIRV-NEXT:    ret void
 //
 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri(
@@ -195,7 +191,7 @@ __global__ void kernel2(int &x) {
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
-// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
+// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
 // OPT-SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
 // OPT-SPIRV-NEXT:    store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
@@ -261,7 +257,7 @@ __global__ void kernel3(__attribute__((address_space(2))) int *x,
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi(
-// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
+// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
 // OPT-SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4
 // OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
@@ -343,7 +339,7 @@ struct S {
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
-// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
 // OPT-SPIRV-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
 // OPT-SPIRV-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
@@ -446,19 +442,17 @@ __global__ void kernel4(struct S s) {
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT:    [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
-// OPT-SPIRV-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
-// OPT-SPIRV-NEXT:    [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8
-// OPT-SPIRV-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
-// OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-// OPT-SPIRV-NEXT:    store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4
-// OPT-SPIRV-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8
-// OPT-SPIRV-NEXT:    [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
-// OPT-SPIRV-NEXT:    [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4
-// OPT-SPIRV-NEXT:    [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
-// OPT-SPIRV-NEXT:    store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4
+// OPT-SPIRV-NEXT:    [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[S_COERCE]], align 8
+// OPT-SPIRV-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// OPT-SPIRV-NEXT:    store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT:    [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8
+// OPT-SPIRV-NEXT:    [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[Y]], align 8
+// OPT-SPIRV-NEXT:    [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4
+// OPT-SPIRV-NEXT:    [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
+// OPT-SPIRV-NEXT:    store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4
 // OPT-SPIRV-NEXT:    ret void
 //
 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S(
@@ -551,7 +545,7 @@ struct T {
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
-// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
 // OPT-SPIRV-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
 // OPT-SPIRV-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
@@ -631,13 +625,11 @@ __global__ void kernel6(struct T t) {
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
-// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT:    [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
-// OPT-SPIRV-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
-// OPT-SPIRV-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
-// OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// OPT-SPIRV-NEXT:    store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-SPIRV-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-SPIRV-NEXT:    store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
 // OPT-SPIRV-NEXT:    ret void
 //
 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi(
@@ -700,7 +692,7 @@ struct SS {
 // OPT-NEXT:    ret void
 //
 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
-// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
 // OPT-SPIRV-NEXT:  [[ENTRY:.*:]]
 // OPT-SPIRV-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
 // OPT-SPIRV-NEXT:    [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt
index 326343ae278148..0ae292498e4636 100644
--- a/llvm/lib/Target/SPIRV/CMakeLists.txt
+++ b/llvm/lib/Target/SPIRV/CMakeLists.txt
@@ -52,6 +52,8 @@ add_llvm_target(SPIRVCodeGen
   Core
   Demangle
   GlobalISel
+  Passes
+  Scalar
   SPIRVAnalysis
   MC
   SPIRVDesc
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index e5384b2eb2c2c1..91bcd68813fc55 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -26,9 +26,15 @@
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Pass.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Passes/PassBuilder.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
 #include "llvm/Transforms/Utils.h"
 #include <optional>
 
@@ -91,6 +97,89 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
   setRequiresStructuredCFG(false);
 }
 
+namespace {
+  enum AddressSpace {
+    Function = storageClassToAddressSpace(SPIRV::StorageClass::Function),
+    CrossWorkgroup =
+        storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup),
+    UniformConstant =
+        storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant),
+    Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup),
+    Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic)
+  };
+}
+
+unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const {
+  const auto *LD = dyn_cast<LoadInst>(V);
+  if (!LD)
+    return UINT32_MAX;
+
+  // It must be a load from a pointer to Generic.
+  assert(V->getType()->isPointerTy() &&
+         V->getType()->getPointerAddressSpace() == AddressSpace::Generic);
+
+  const auto *Ptr = LD->getPointerOperand();
+  if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant)
+    return UINT32_MAX;
+  // For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup
+  // storage, as this could only have been legally initialised with a
+  // CrossWorkgroup (aka device) constant pointer.
+  return AddressSpace::CrossWorkgroup;
+}
+
+std::pair<const Value *, unsigned>
+SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const {
+  using namespace PatternMatch;
+
+  if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::amdgcn_is_shared:
+      return std::pair(II->getArgOperand(0), AddressSpace::Workgroup);
+    case Intrinsic::amdgcn_is_private:
+      return std::pair(II->getArgOperand(0), AddressSpace::Function);
+    default:
+      break;
+    }
+    return std::pair(nullptr, UINT32_MAX);
+  }
+  // Check the global pointer predication based on
+  // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
+  // the order of 'is_shared' and 'is_private' is not significant.
+  Value *Ptr;
+  if (getTargetTriple().getVendor() == Triple::VendorType::AMD &&
+      match(
+        const_cast<Value *>(V),
+        m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
+                m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(m_Deferred(Ptr))))))
+    return std::pair(Ptr, AddressSpace::CrossWorkgroup);
+
+  return std::pair(nullptr, UINT32_MAX);
+}
+
+bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
+                                             unsigned DestAS) const {
+  if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup)
+    return false;
+  return DestAS == AddressSpace::Generic ||
+         DestAS == AddressSpace::CrossWorkgroup;
+}
+
+void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+  PB.registerCGSCCOptimizerLateEPCallback([](CGSCCPassManager &PM,
+                                             OptimizationLevel Level) {
+    if (Level == OptimizationLevel::O0)
+      return;
+
+    FunctionPassManager FPM;
+
+    // Add infer address spaces pass to the opt pipeline after inlining
+    // but before SROA to increase SROA opportunities.
+    FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic));
+
+    PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+  });
+}
+
 namespace {
 // SPIR-V Code Generator Pass Configuration Options.
 class SPIRVPassConfig : public TargetPassConfig {
@@ -178,6 +267,9 @@ void SPIRVPassConfig::addIRPasses() {
     addPass(createSPIRVStructurizerPass());
   }
 
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+    addPass(createInferAddressSpacesPass(AddressSpace::Generic));
+
   addPass(createSPIRVRegularizerPass());
   addPass(createSPIRVPrepareFunctionsPass(TM));
   addPass(createSPIRVStripConvergenceIntrinsicsPass());
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
index a1a9f26846153b..24b09febb9d184 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
@@ -43,6 +43,13 @@ class SPIRVTargetMachine : public LLVMTargetMachine {
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
+
+  unsigned getAssumedAddrSpace(const Value *V) const override;
+  std::pair<const Value *, unsigned>
+  getPredicatedAddrSpace(const Value *V) const override;
+  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override;
+
+  void registerPassBuilderCallbacks(PassBuilder &PB) override;
 };
 } // namespace llvm
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 24047f31fab290..295c0ceeade839 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -39,6 +39,10 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
+  unsigned getFlatAddressSpace() const {
+    return storageClassToAddressSpace(SPIRV::StorageClass::Generic);
+  }
+
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
     // SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it
     // is reasonable to assume the Op is fast / preferable to the expanded loop.
diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll
new file mode 100644
index 00000000000000..9b65ff44f288f2
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s
+; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s
+
+@c0 = addrspace(2) global ptr undef
+
+; CHECK-LABEL: @generic_ptr_from_constant
+; CHECK: addrspacecast ptr addrspace(4) %p to ptr addrspace(1)
+; CHECK-NEXT: load float, ptr addrspace(1)
+define spir_func float @generic_ptr_from_constant() {
+  %p = load ptr addrspace(4), ptr addrspace(2) @c0
+  %v = load float, ptr addrspace(4) %p
+  ret float %v
+}
+
+%struct.S = type { ptr addrspace(4), ptr addrspace(4) }
+
+; CHECK-LABEL: @generic_ptr_from_aggregate_argument
+; CHECK: addrspacecast ptr addrspace(4) %p0 to ptr addrspace(1)
+; CHECK: addrspacecast ptr addrspace(4) %p1 to ptr addrspace(1)
+; CHECK: load i32, ptr addrspace(1)
+; CHECK: store float %v1, ptr addrspace(1)
+; CHECK: ret
+define spir_kernel void @generic_ptr_from_aggregate_argument(ptr addrspace(2) byval(%struct.S) align 8 %0) {
+  %p0 = load ptr addrspace(4), ptr addrspace(2) %0
+  %f1 = getelementptr inbounds %struct.S, ptr addrspace(2) %0, i64 0, i32 1
+  %p1 = load ptr addrspace(4), ptr addrspace(2) %f1
+  %v0 = load i32, ptr addrspace(4) %p0
+  %v1 = sitofp i32 %v0 to float
+  store float %v1, ptr addrspace(4) %p1
+  ret void
+}
diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll
new file mode 100644
index 00000000000000..75b23aa30349af
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s
+; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s
+
+; Trivial optimization of generic addressing
+
+define float @load_global_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define float @load_global_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr addrspace(1) [[TMP0]], align 4
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1)
+  %tmp1 = load float, ptr addrspace(1) %tmp0
+  ret float %tmp1
+}
+
+define float @load_group_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define float @load_group_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3)
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr addrspace(3) [[TMP0]], align 4
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3)
+  %tmp1 = load float, ptr addrspace(3) %tmp0
+  ret float %tmp1
+}
+
+define float @load_private_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define float @load_private_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr
+  %tmp1 = load float, ptr %tmp0
+  ret float %tmp1
+}
+
+define spir_kernel void @store_global_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define spir_kernel void @store_global_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1)
+; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4
+; CHECK-NEXT:    ret void
+;
+  %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1)
+  store float 0.0, ptr addrspace(1) %tmp0
+  ret void
+}
+
+define spir_kernel void @store_group_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define spir_kernel void @store_group_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3)
+; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(3) [[TMP0]], align 4
+; CHECK-NEXT:    ret void
+;
+  %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3)
+  store float 0.0, ptr addrspace(3) %tmp0
+  ret void
+}
+
+define spir_kernel void @store_private_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: de...
[truncated]

github-actions · 2024-10-02T17:23:21Z

✅ With the latest revision this PR passed the C/C++ code formatter.

arsenm · 2024-10-02T17:44:13Z

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

+unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const {
+  const auto *LD = dyn_cast<LoadInst>(V);
+  if (!LD)
+    return UINT32_MAX;
+
+  // It must be a load from a pointer to Generic.
+  assert(V->getType()->isPointerTy() &&
+         V->getType()->getPointerAddressSpace() == AddressSpace::Generic);
+
+  const auto *Ptr = LD->getPointerOperand();
+  if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant)
+    return UINT32_MAX;
+  // For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup
+  // storage, as this could only have been legally initialised with a
+  // CrossWorkgroup (aka device) constant pointer.
+  return AddressSpace::CrossWorkgroup;
+}
+
+std::pair<const Value *, unsigned>
+SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const {
+  using namespace PatternMatch;
+
+  if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::amdgcn_is_shared:
+      return std::pair(II->getArgOperand(0), AddressSpace::Workgroup);
+    case Intrinsic::amdgcn_is_private:
+      return std::pair(II->getArgOperand(0), AddressSpace::Function);
+    default:
+      break;
+    }
+    return std::pair(nullptr, UINT32_MAX);
+  }
+  // Check the global pointer predication based on
+  // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
+  // the order of 'is_shared' and 'is_private' is not significant.
+  Value *Ptr;
+  if (getTargetTriple().getVendor() == Triple::VendorType::AMD &&
+      match(
+          const_cast<Value *>(V),
+          m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
+                m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
+                    m_Deferred(Ptr))))))
+    return std::pair(Ptr, AddressSpace::CrossWorkgroup);
+
+  return std::pair(nullptr, UINT32_MAX);
+}


This is the fancy stuff that should go into a follow up patch to add assume support

arsenm · 2024-10-02T17:44:32Z

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

+          m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
+                m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
+                    m_Deferred(Ptr))))))


Shouldn't be looking at the amdgcn intrinsics? Surely spirv has its own operations for this?

There's AMDGCN flavoured SPIR-V, which'd possibly have these in source; I don't think there's AS predicates in SPIR-V, at least not AFAICS in Clang/LLVM/the spec - happy to add them if they exist, but we'll need both.

If I have skimmed SPIRV correctly, it expects invalid addrspacecasts (OpGenericCastToPtrExplicit) to return null. You could implement the same kind of check by looking for icmp ne (addrspacecast x to y), null

Neither the BE nor the Translator handle that at the moment, and I suspect it's meant for implementing some specific bit of OpenCL (SYCL?) functionality. We use the non-explicit flavours, and those don't return null (and are diagnosed as illegal if they are illegal per spec). This is probably a good way of implementing the predicates / handling this, so thank you for it. Having said that, I reiterate that we have AMDGCN flavoured SPIR-V where the actual AMDGCN predicates would manifest / make sense.

We could do the same thing for amdgpu. We implement addrspacecast with the same operations.

This also reminds me, we should have a valid flag on addrspacecast.

If I have skimmed SPIRV correctly, it expects invalid addrspacecasts (OpGenericCastToPtrExplicit) to return null. You could implement the same kind of check by looking for icmp ne (addrspacecast x to y), null

The semantic described in Langref doesn't allow you to map addrspacecast to OpGenericCastToPtrExplicit as no check is performed. OpGenericCastToPtr is what I expect addrspacecast to be mapped to.

I do think we need to add a poison-if-known-invalid-cast flag to addrspacecast

I do think we need to add a poison-if-known-invalid-cast flag to addrspacecast

+1 (so does SPIR-V but that's another story)

If I have skimmed SPIRV correctly, it expects invalid addrspacecasts (OpGenericCastToPtrExplicit) to return null. You could implement the same kind of check by looking for icmp ne (addrspacecast x to y), null

The semantic described in Langref doesn't allow you to map addrspacecast to OpGenericCastToPtrExplicit as no check is performed. OpGenericCastToPtr is what I expect addrspacecast to be mapped to.

I think that we just need to implement the AS predicates (is_local / is_private & friends) atop OpGenericPtrMemSemantics; not as part of this patch but in general, orthogonally to adding poison-if-known-invalid-cast.

arsenm · 2024-10-02T17:44:53Z

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

+  Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic)
+};
+
+unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const {


Move to separate change, not sure this is necessarily valid for spirv

UniformConstant is pretty much OCL constant (with a bit of handwavium around initializers being allowed depending on an undefined client API). This is just saying that if you have a load from that, and you're loading a pointer, that pointer can only point to global (CrossWorkgroup), which I think holds here as well because there's no legal way to put a private or a local (shared) pointer in there (if you do it at static init, before a kernel executes, you cannot form those types of addresses, if you do it as the kernel executes it's UB). Or are you worried about cases where global does not include constant?

arsenm · 2024-10-02T17:45:23Z

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

+bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
+                                             unsigned DestAS) const {
+  if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup)
+    return false;
+  return DestAS == AddressSpace::Generic ||
+         DestAS == AddressSpace::CrossWorkgroup;
+}


This is separate, I don't think InferAddressSpaces relies on this

It does, please see isNoopPtrIntCastPair in its implementation.

arsenm · 2024-10-02T17:46:09Z

llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll

@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py


You don't need to duplicate all of these tests. You just need some basic samples that the target is implemented, the full set is testing pass mechanics which can be done on any target

arsenm · 2024-10-02T17:46:38Z

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

@@ -178,6 +266,9 @@ void SPIRVPassConfig::addIRPasses() {
    addPass(createSPIRVStructurizerPass());
  }

+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+    addPass(createInferAddressSpacesPass(AddressSpace::Generic));


Not sure why this is a pass parameter to InferAddressSpaces, and a TTI hook

Because if one invokes the pass directly via opt there's no way but the TTI query to set Flat/Generic to anything but 0, and because making it explicit at the point of construction rather than relying on that seems somewhat more self documenting.

…ras_for_spirv

arsenm

Should restrict this to just adding the basic pass, without the fancy assumed address space or assume handling. Leave those for later.

Also don't duplicate every test. These are mostly structural tests for the pass that do not should not be duplicated in every target. Just add a simple test with the basics to show the pass runs

AlexVlx · 2024-10-09T19:27:30Z

Should restrict this to just adding the basic pass, without the fancy assumed address space or assume handling. Leave those for later.

Any particular reason for this, asides from the concern around constant / UniformConstant? I'll re-iterate that for AMDGCN flavoured SPIR-V it should do exactly what we do in AMDGPU, so punting in general seems counter-intuitive.

…ras_for_spirv

AlexVlx

Move to separate change, not sure this is necessarily valid for spirv

I think that I'd prefer to keep this around, definitely for AMDGCNSPIRV where we know it is both correct and empirically beneficial. For vanilla SPIR-V I'll defer to folks on that side - I cannot think about cases where it'd be legal to put anything but a pointer to global (CrossWorkgroup) in constant (UniformConstant), but that might simply be ignorance on my part.

…ras_for_spirv

arsenm · 2024-10-14T16:59:49Z

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

+}
+
+std::pair<const Value *, unsigned>
+SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const {


Drop this part from the patch, it's not tested and is questionable enough to do separately

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp

…ras_for_spirv

AlexVlx added 2 commits October 2, 2024 11:18

Enable InferAddressSpaces for SPIR-V.

9f3cac4

Merge branch 'main' of https://github.com/llvm/llvm-project into infe…

fcab1dd

…ras_for_spirv

llvmbot added clang Clang issues not falling into any other category backend:AMDGPU backend:SPIR-V llvm:transforms labels Oct 2, 2024

AlexVlx requested review from michalpaszkowski, arsenm, VyacheslavLevytskyy and yxsamliu October 2, 2024 17:19

AlexVlx added tools:opt llvm:optimizations labels Oct 2, 2024

Fix formatting.

dc1a5f5

arsenm reviewed Oct 2, 2024

View reviewed changes

VyacheslavLevytskyy requested a review from Keenuts October 2, 2024 18:21

Merge branch 'main' of https://github.com/llvm/llvm-project into infe…

d5483cd

…ras_for_spirv

arsenm requested changes Oct 9, 2024

View reviewed changes

AlexVlx added 2 commits October 12, 2024 02:48

Merge branch 'main' of https://github.com/llvm/llvm-project into infe…

a28ff5d

…ras_for_spirv

Reduce set of tests.

31a5ebe

AlexVlx commented Oct 11, 2024

View reviewed changes

AlexVlx added 4 commits October 12, 2024 03:51

Fix formatting.

a01e1bc

Fix inclusion ordering.

ab1fb66

Only enable "fancy" stuff fof amdgcnspirv for now.

168149a

Merge branch 'main' of https://github.com/llvm/llvm-project into infe…

e1e57ad

…ras_for_spirv

arsenm reviewed Oct 14, 2024

View reviewed changes

llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp Outdated Show resolved Hide resolved

Merge branch 'main' of https://github.com/llvm/llvm-project into infe…

102e886

…ras_for_spirv

AlexVlx added 3 commits October 22, 2024 14:54

Merge branch 'main' of https://github.com/llvm/llvm-project into infe…

797a80a

…ras_for_spirv

Remove spurious target check, clarify comment.

a7d1467

Merge branch 'main' of https://github.com/llvm/llvm-project into infe…

770afb8

…ras_for_spirv

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[llvm][opt][Transforms][SPIR-V] Enable `InferAddressSpaces` for SPIR-V #110897

[llvm][opt][Transforms][SPIR-V] Enable `InferAddressSpaces` for SPIR-V #110897

AlexVlx commented Oct 2, 2024

llvmbot commented Oct 2, 2024 •

edited

Loading

github-actions bot commented Oct 2, 2024 •

edited

Loading

arsenm Oct 2, 2024

arsenm Oct 2, 2024

AlexVlx Oct 2, 2024

arsenm Oct 9, 2024

AlexVlx Oct 9, 2024

arsenm Oct 9, 2024

Naghasan Oct 16, 2024

arsenm Oct 16, 2024

Naghasan Oct 16, 2024

AlexVlx Oct 22, 2024

arsenm Oct 2, 2024

AlexVlx Oct 2, 2024

arsenm Oct 2, 2024

AlexVlx Oct 2, 2024

arsenm Oct 2, 2024

arsenm Oct 2, 2024

AlexVlx Oct 2, 2024

arsenm left a comment

AlexVlx commented Oct 9, 2024

AlexVlx left a comment

arsenm Oct 14, 2024

		@@ -0,0 +1,29 @@
		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py

[llvm][opt][Transforms][SPIR-V] Enable InferAddressSpaces for SPIR-V #110897

Are you sure you want to change the base?

[llvm][opt][Transforms][SPIR-V] Enable InferAddressSpaces for SPIR-V #110897

Conversation

AlexVlx commented Oct 2, 2024

llvmbot commented Oct 2, 2024 • edited Loading

github-actions bot commented Oct 2, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

arsenm left a comment

Choose a reason for hiding this comment

AlexVlx commented Oct 9, 2024

AlexVlx left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

[llvm][opt][Transforms][SPIR-V] Enable `InferAddressSpaces` for SPIR-V #110897

[llvm][opt][Transforms][SPIR-V] Enable `InferAddressSpaces` for SPIR-V #110897

llvmbot commented Oct 2, 2024 •

edited

Loading

github-actions bot commented Oct 2, 2024 •

edited

Loading