Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HLSL] set alwaysinline on HLSL functions #106588

Merged
merged 12 commits into from
Sep 17, 2024
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
NumThreadsAttr->getZ());
Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
}
Fn->addFnAttr(llvm::Attribute::NoInline);
}

static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {
Expand Down
20 changes: 14 additions & 6 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2471,11 +2471,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::StackProtectReq);

if (!D) {
// Non-entry HLSL functions must always be inlined.
if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline))
B.addAttribute(llvm::Attribute::AlwaysInline);
// If we don't have a declaration to control inlining, the function isn't
// explicitly marked as alwaysinline for semantic reasons, and inlining is
// disabled, mark the function as noinline.
if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
else if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
B.addAttribute(llvm::Attribute::NoInline);

F->addFnAttrs(B);
Expand All @@ -2502,9 +2505,13 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();

// Add optnone, but do so only if the function isn't always_inline.
if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Non-entry HLSL functions must always be inlined.
if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline) &&
!D->hasAttr<NoInlineAttr>()) {
B.addAttribute(llvm::Attribute::AlwaysInline);
} else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Add optnone, but do so only if the function isn't always_inline.
B.addAttribute(llvm::Attribute::OptimizeNone);

// OptimizeNone implies noinline; we should not be inlining such functions.
Expand All @@ -2524,7 +2531,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::NoInline);
} else if (D->hasAttr<NoDuplicateAttr>()) {
B.addAttribute(llvm::Attribute::NoDuplicate);
} else if (D->hasAttr<NoInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
} else if (D->hasAttr<NoInlineAttr>() &&
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Add noinline if the function isn't always_inline.
B.addAttribute(llvm::Attribute::NoInline);
} else if (D->hasAttr<AlwaysInlineAttr>() &&
Expand Down
31 changes: 21 additions & 10 deletions clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE

int i;

Expand All @@ -7,7 +8,7 @@ __attribute__((constructor)) void call_me_first(void) {
}

__attribute__((constructor)) void then_call_me(void) {
i = 12;
i = 13;
}

__attribute__((destructor)) void call_me_last(void) {
Expand All @@ -21,11 +22,21 @@ void main(unsigned GI : SV_GroupIndex) {}
// CHECK-NOT:@llvm.global_ctors
// CHECK-NOT:@llvm.global_dtors

//CHECK: define void @main()
//CHECK-NEXT: entry:
//CHECK-NEXT: call void @"?call_me_first@@YAXXZ"()
//CHECK-NEXT: call void @"?then_call_me@@YAXXZ"()
//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
//CHECK-NEXT: call void @"?call_me_last@@YAXXZ"(
//CHECK-NEXT: ret void
// CHECK: define void @main()
// CHECK-NEXT: entry:
// Verify function constructors are emitted
// NOINLINE-NEXT: call void @"?call_me_first@@YAXXZ"()
// NOINLINE-NEXT: call void @"?then_call_me@@YAXXZ"()
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
// NOINLINE-NEXT: call void @"?call_me_last@@YAXXZ"(
// NOINLINE-NEXT: ret void

// Verify constructor calls are inlined when AlwaysInline is run
// INLINE-NEXT: alloca
// INLINE-NEXT: store i32 12
// INLINE-NEXT: store i32 13
// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// INLINE-NEXT: store i32 %
// INLINE-NEXT: store i32 0
// INLINE: ret void
23 changes: 19 additions & 4 deletions clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE

// Make sure global variable for ctors exist for lib profile.
// CHECK:@llvm.global_ctors
Expand All @@ -11,13 +12,27 @@ void FirstEntry() {}

// CHECK: define void @FirstEntry()
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
// NOINLINE-NEXT: call void @"?FirstEntry@@YAXXZ"()
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void

[shader("compute")]
[numthreads(1,1,1)]
void SecondEntry() {}

// CHECK: define void @SecondEntry()
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
// CHECK-NEXT: call void @"?SecondEntry@@YAXXZ"()
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
// NOINLINE-NEXT: call void @"?SecondEntry@@YAXXZ"()
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void


// Verify the constructor is alwaysinline
// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[IntAttr:\#[0-9]+]]

// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
51 changes: 31 additions & 20 deletions clang/test/CodeGenHLSL/GlobalDestructors.hlsl
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK

// Make sure global variable for dtors exist for lib profile.
// Tests that constructors and destructors are appropriately generated for globals
// and that their calls are inlined when AlwaysInline is run
// but global variables are retained for the library profiles

// Make sure global variable for ctors/dtors exist for lib profile.
// LIB:@llvm.global_ctors
// LIB:@llvm.global_dtors
// Make sure global variable for dtors removed for compute profile.
// CS-NOT:llvm.global_dtors
// Make sure global variable for ctors/dtors removed for compute profile.
// CS-NOT:@llvm.global_ctors
// CS-NOT:@llvm.global_dtors

struct Tail {
Tail() {
Expand Down Expand Up @@ -46,22 +54,25 @@ void main(unsigned GI : SV_GroupIndex) {
Wag();
}

// Make sure global variable for ctors/dtors removed.
// CHECK-NOT:@llvm.global_ctors
// CHECK-NOT:@llvm.global_dtors
//CHECK: define void @main()
//CHECK-NEXT: entry:
//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
//CHECK-NEXT: call void @_GLOBAL__D_a()
//CHECK-NEXT: ret void
// CHECK: define void @main()
// CHECK-NEXT: entry:
// Verify destructor is emitted
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
// NOINLINE-NEXT: call void @_GLOBAL__D_a()
// NOINLINE-NEXT: ret void
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// INLINE: ret void

// This is really just a sanity check I needed for myself to verify that
// function scope static variables also get destroyed properly.

//CHECK: define internal void @_GLOBAL__D_a()
//CHECK-NEXT: entry:
//CHECK-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A")
//CHECK-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A")
//CHECK-NEXT: ret void
// NOINLINE: define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
// NOINLINE-NEXT: entry:
// NOINLINE-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A")
// NOINLINE-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A")
// NOINLINE-NEXT: ret void

// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
1 change: 1 addition & 0 deletions clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV

RWBuffer<float> Buf;
Expand Down
5 changes: 3 additions & 2 deletions clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ void main(unsigned GI : SV_GroupIndex) {
// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
// and confusing to follow so the match here is pretty weak.

// CHECK: define internal void @"?main@@YAXI@Z"
// CHECK-NOT: call
// CHECK: define void @main()
// Verify inlining leaves only calls to "llvm." intrinsics
// CHECK-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
76 changes: 76 additions & 0 deletions clang/test/CodeGenHLSL/inline-constructors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE

// Tests that implicit constructor calls for user classes will always be inlined.

struct Weed {
Weed() {Count += 1;}
[[maybe_unused]] void pull() {Count--;}
static int weedCount() { return Count; }
private:
static int Count;

} YardWeeds;

int Weed::Count = 1; // It begins. . .

struct Kitty {
unsigned burrsInFur;

Kitty() {
burrsInFur = 0;
}

void wanderInYard(int hours) {
burrsInFur = hours*Weed::weedCount()/8;
}

void lick() {
if(burrsInFur) {
burrsInFur--;
Weed w;
}
}

} Nion;

void NionsDay(int hours) {
static Kitty Nion;
Nion.wanderInYard(hours);
while(Nion.burrsInFur) Nion.lick();
}

// CHECK: define void @main()
// CHECK-NEXT: entry:
// Verify constructor is emitted
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
[shader("compute")]
[numthreads(1,1,1)]
void main(unsigned GI : SV_GroupIndex) {
NionsDay(10);
}


// CHECK: define void @rainyMain()
// CHECK-NEXT: entry:
// Verify constructor is emitted
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
// NOINLINE-NEXT: call void @"?rainyMain@@YAXXZ"()
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
[shader("compute")]
[numthreads(1,1,1)]
void rainyMain() {
NionsDay(1);
}

Loading