Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR for llvm/llvm-project#54560 #187

Open
wants to merge 3 commits into
base: release/14.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions clang/include/clang/AST/ASTContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -3279,11 +3279,11 @@ OPT_LIST(V)
/// Return a new OMPTraitInfo object owned by this context.
OMPTraitInfo &getNewOMPTraitInfo();

/// Whether a C++ static variable may be externalized.
bool mayExternalizeStaticVar(const Decl *D) const;
/// Whether a C++ static variable or CUDA/HIP kernel may be externalized.
bool mayExternalize(const Decl *D) const;

/// Whether a C++ static variable should be externalized.
bool shouldExternalizeStaticVar(const Decl *D) const;
/// Whether a C++ static variable or CUDA/HIP kernel should be externalized.
bool shouldExternalize(const Decl *D) const;

StringRef getCUIDHash() const;

Expand Down
24 changes: 14 additions & 10 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11331,7 +11331,7 @@ static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context,
// name between the host and device compilation which is the same for the
// same compilation unit whereas different among different compilation
// units.
if (Context.shouldExternalizeStaticVar(D))
if (Context.shouldExternalize(D))
return GVA_StrongExternal;
}
return L;
Expand Down Expand Up @@ -12258,22 +12258,26 @@ operator<<(const StreamingDiagnostic &DB,
return DB << "a prior #pragma section";
}

bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
bool ASTContext::mayExternalize(const Decl *D) const {
bool IsStaticVar =
isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
!D->getAttr<CUDADeviceAttr>()->isImplicit()) ||
(D->hasAttr<CUDAConstantAttr>() &&
!D->getAttr<CUDAConstantAttr>()->isImplicit());
// CUDA/HIP: static managed variables need to be externalized since it is
// a declaration in IR, therefore cannot have internal linkage.
return IsStaticVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar);
}

bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
return mayExternalizeStaticVar(D) &&
(D->hasAttr<HIPManagedAttr>() ||
// a declaration in IR, therefore cannot have internal linkage. Kernels in
// anonymous name space needs to be externalized to avoid duplicate symbols.
return (IsStaticVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
(D->hasAttr<CUDAGlobalAttr>() &&
basicGVALinkageForFunction(*this, cast<FunctionDecl>(D)) ==
GVA_Internal);
}

bool ASTContext::shouldExternalize(const Decl *D) const {
return mayExternalize(D) &&
(D->hasAttr<HIPManagedAttr>() || D->hasAttr<CUDAGlobalAttr>() ||
CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
}

Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/CGCUDANV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,13 +281,13 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
DeviceSideName = std::string(ND->getIdentifier()->getName());

// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
if (CGM.getContext().shouldExternalize(ND) &&
CGM.getLangOpts().GPURelocatableDeviceCode &&
!CGM.getLangOpts().CUID.empty()) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << DeviceSideName;
CGM.printPostfixForExternalizedStaticVar(Out);
CGM.printPostfixForExternalizedDecl(Out, ND);
DeviceSideName = std::string(Out.str());
}
return DeviceSideName;
Expand Down
22 changes: 14 additions & 8 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1364,10 +1364,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
}

// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
if (CGM.getContext().shouldExternalize(ND) &&
CGM.getLangOpts().GPURelocatableDeviceCode &&
CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty())
CGM.printPostfixForExternalizedStaticVar(Out);
CGM.printPostfixForExternalizedDecl(Out, ND);
return std::string(Out.str());
}

Expand Down Expand Up @@ -1434,8 +1434,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
// static device variable depends on whether the variable is referenced by
// a host or device host function. Therefore the mangled name cannot be
// cached.
if (!LangOpts.CUDAIsDevice ||
!getContext().mayExternalizeStaticVar(GD.getDecl())) {
if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) {
auto FoundName = MangledDeclNames.find(CanonicalGD);
if (FoundName != MangledDeclNames.end())
return FoundName->second;
Expand All @@ -1455,7 +1454,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
// directly between host- and device-compilations, the host- and
// device-mangling in host compilation could help catching certain ones.
assert(!isa<FunctionDecl>(ND) || !ND->hasAttr<CUDAGlobalAttr>() ||
getLangOpts().CUDAIsDevice ||
getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice ||
(getContext().getAuxTargetInfo() &&
(getContext().getAuxTargetInfo()->getCXXABI() !=
getContext().getTargetInfo().getCXXABI())) ||
Expand Down Expand Up @@ -6645,7 +6644,14 @@ bool CodeGenModule::stopAutoInit() {
return false;
}

void CodeGenModule::printPostfixForExternalizedStaticVar(
llvm::raw_ostream &OS) const {
OS << "__static__" << getContext().getCUIDHash();
void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
const Decl *D) const {
StringRef Tag;
// ptxas does not allow '.' in symbol names. On the other hand, HIP prefers
// postfix beginning with '.' since the symbol name can be demangled.
if (LangOpts.HIP)
Tag = (isa<VarDecl>(D) ? ".static." : ".intern.");
else
Tag = (isa<VarDecl>(D) ? "__static__" : "__intern__");
OS << Tag << getContext().getCUIDHash();
}
7 changes: 4 additions & 3 deletions clang/lib/CodeGen/CodeGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -1447,9 +1447,10 @@ class CodeGenModule : public CodeGenTypeCache {
TBAAAccessInfo *TBAAInfo = nullptr);
bool stopAutoInit();

/// Print the postfix for externalized static variable for single source
/// offloading languages CUDA and HIP.
void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const;
/// Print the postfix for externalized static variable or kernels for single
/// source offloading languages CUDA and HIP.
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
const Decl *D) const;

private:
llvm::Constant *GetOrCreateLLVMFunction(
Expand Down
31 changes: 23 additions & 8 deletions clang/test/CodeGenCUDA/device-var-linkage.cu
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=DEV,NORDC %s
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=DEV,RDC %s
// RUN: %clang_cc1 -triple nvptx \
// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
// RUN: -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s
// RUN: %clang_cc1 -triple nvptx \
// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=HOST,RDC-H %s
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - %s \
// RUN: | FileCheck -check-prefixes=CUDA %s

#include "Inputs/cuda.h"

Expand All @@ -24,7 +27,9 @@ __constant__ int v2;
// DEV-DAG: @v3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
// NORDC-H-DAG: @v3 = internal externally_initialized global i32* null
// RDC-H-DAG: @v3 = externally_initialized global i32* null
#if __HIP__
__managed__ int v3;
#endif

// DEV-DAG: @ev1 = external addrspace(1) global i32
// HOST-DAG: @ev1 = external global i32
Expand All @@ -34,25 +39,35 @@ extern __device__ int ev1;
extern __constant__ int ev2;
// DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)*
// HOST-DAG: @ev3 = external externally_initialized global i32*
#if __HIP__
extern __managed__ int ev3;
#endif

// NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0
// RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
// HOST-DAG: @_ZL3sv1 = internal global i32 undef
// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
static __device__ int sv1;
// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0
// RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
// HOST-DAG: @_ZL3sv2 = internal global i32 undef
// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
static __constant__ int sv2;
// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
// RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
// HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null
#if __HIP__
static __managed__ int sv3;
#endif

__device__ __host__ int work(int *x);

__device__ __host__ int fun1() {
return work(&ev1) + work(&ev2) + work(&ev3) + work(&sv1) + work(&sv2) + work(&sv3);
return work(&ev1) + work(&ev2) + work(&sv1) + work(&sv2)
#if __HIP__
+ work(&ev3) + work(&sv3)
#endif
;
}

// HOST: hipRegisterVar({{.*}}@v1
Expand Down
58 changes: 58 additions & 0 deletions clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
// RUN: -emit-llvm -o - -x hip %s > %t.dev

// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \
// RUN: -emit-llvm -o - -x hip %s > %t.host

// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s

// RUN: echo "GPU binary" > %t.fatbin

// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
// RUN: -emit-llvm -o - %s > %t.dev

// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
// RUN: -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \
// RUN: -emit-llvm -o - %s > %t.host

// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s

#include "Inputs/cuda.h"

// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](

// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](

// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00"
// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00"
// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00"

// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]]
// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]]
// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]]


template <typename T>
__global__ void tempKern(T x) {}

namespace {
__global__ void kernel() {}
struct X {};
X x;
auto lambda = [](){};
}

void test() {
kernel<<<1, 1>>>();

tempKern<<<1, 1>>>(x);

tempKern<<<1, 1>>>(lambda);
}
12 changes: 5 additions & 7 deletions clang/test/CodeGenCUDA/managed-var.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// REQUIRES: x86-registered-target, amdgpu-registered-target

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
// RUN: -emit-llvm -o - -x hip %s | FileCheck \
// RUN: -check-prefixes=COMMON,DEV,NORDC-D %s
Expand Down Expand Up @@ -52,15 +50,15 @@ extern __managed__ int ex;

// NORDC-D-DAG: @_ZL2sx.managed = addrspace(1) externally_initialized global i32 1, align 4
// NORDC-D-DAG: @_ZL2sx = addrspace(1) externally_initialized global i32 addrspace(1)* null
// RDC-D-DAG: @_ZL2sx__static__[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4
// RDC-D-DAG: @_ZL2sx__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4
// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
// HOST-DAG: @_ZL2sx.managed = internal global i32 1
// HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null
// NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00"
// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH:.*]]\00"
// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00"

// POSTFIX: @_ZL2sx__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH]]\00"
// POSTFIX: @_ZL2sx.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00"
static __managed__ int sx = 1;

// DEV-DAG: @llvm.compiler.used
Expand Down
18 changes: 12 additions & 6 deletions clang/test/CodeGenCUDA/static-device-var-rdc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
// RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s

// Check postfix for CUDA.

// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \
// RUN: -check-prefixes=CUDA %s

#include "Inputs/cuda.h"

Expand All @@ -55,11 +60,12 @@
// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"

// Test externalized static device variables
// EXT-DEV-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH:.*]]\00"
// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00"
// CUDA-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0

// POSTFIX: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH]]\00"
// POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00"

static __device__ int x;

Expand All @@ -73,8 +79,8 @@ static __device__ int x2;
// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"

// Test externalized static device variables
// EXT-DEV-DAG: @_ZL1y__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y__static__[[HASH]]\00"
// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"

static __constant__ int y;

Expand Down