Skip to content

Commit

Permalink
Reapply "[HLSL] set alwaysinline on HLSL functions (llvm#106588)"
Browse files Browse the repository at this point in the history
This reverts commit 4a63f4d.

It was reverted because of a buildbot breakage, but the fix-forward has
landed (llvm#109023).
  • Loading branch information
thurstond authored and hamphet committed Sep 18, 2024
1 parent c640d15 commit a1f59f2
Show file tree
Hide file tree
Showing 9 changed files with 282 additions and 42 deletions.
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
NumThreadsAttr->getZ());
Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
}
Fn->addFnAttr(llvm::Attribute::NoInline);
}

static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {
Expand Down
20 changes: 14 additions & 6 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2473,11 +2473,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::StackProtectReq);

if (!D) {
// Non-entry HLSL functions must always be inlined.
if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline))
B.addAttribute(llvm::Attribute::AlwaysInline);
// If we don't have a declaration to control inlining, the function isn't
// explicitly marked as alwaysinline for semantic reasons, and inlining is
// disabled, mark the function as noinline.
if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
else if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
B.addAttribute(llvm::Attribute::NoInline);

F->addFnAttrs(B);
Expand All @@ -2504,9 +2507,13 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();

// Add optnone, but do so only if the function isn't always_inline.
if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Non-entry HLSL functions must always be inlined.
if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline) &&
!D->hasAttr<NoInlineAttr>()) {
B.addAttribute(llvm::Attribute::AlwaysInline);
} else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Add optnone, but do so only if the function isn't always_inline.
B.addAttribute(llvm::Attribute::OptimizeNone);

// OptimizeNone implies noinline; we should not be inlining such functions.
Expand All @@ -2526,7 +2533,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::NoInline);
} else if (D->hasAttr<NoDuplicateAttr>()) {
B.addAttribute(llvm::Attribute::NoDuplicate);
} else if (D->hasAttr<NoInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
} else if (D->hasAttr<NoInlineAttr>() &&
!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Add noinline if the function isn't always_inline.
B.addAttribute(llvm::Attribute::NoInline);
} else if (D->hasAttr<AlwaysInlineAttr>() &&
Expand Down
31 changes: 21 additions & 10 deletions clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE

int i;

Expand All @@ -7,7 +8,7 @@ __attribute__((constructor)) void call_me_first(void) {
}

__attribute__((constructor)) void then_call_me(void) {
i = 12;
i = 13;
}

__attribute__((destructor)) void call_me_last(void) {
Expand All @@ -21,11 +22,21 @@ void main(unsigned GI : SV_GroupIndex) {}
// CHECK-NOT:@llvm.global_ctors
// CHECK-NOT:@llvm.global_dtors

//CHECK: define void @main()
//CHECK-NEXT: entry:
//CHECK-NEXT: call void @"?call_me_first@@YAXXZ"()
//CHECK-NEXT: call void @"?then_call_me@@YAXXZ"()
//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
//CHECK-NEXT: call void @"?call_me_last@@YAXXZ"(
//CHECK-NEXT: ret void
// CHECK: define void @main()
// CHECK-NEXT: entry:
// Verify function constructors are emitted
// NOINLINE-NEXT: call void @"?call_me_first@@YAXXZ"()
// NOINLINE-NEXT: call void @"?then_call_me@@YAXXZ"()
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
// NOINLINE-NEXT: call void @"?call_me_last@@YAXXZ"(
// NOINLINE-NEXT: ret void

// Verify constructor calls are inlined when AlwaysInline is run
// INLINE-NEXT: alloca
// INLINE-NEXT: store i32 12
// INLINE-NEXT: store i32 13
// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// INLINE-NEXT: store i32 %
// INLINE-NEXT: store i32 0
// INLINE: ret void
23 changes: 19 additions & 4 deletions clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE

// Make sure global variable for ctors exist for lib profile.
// CHECK:@llvm.global_ctors
Expand All @@ -11,13 +12,27 @@ void FirstEntry() {}

// CHECK: define void @FirstEntry()
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
// NOINLINE-NEXT: call void @"?FirstEntry@@YAXXZ"()
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void

[shader("compute")]
[numthreads(1,1,1)]
void SecondEntry() {}

// CHECK: define void @SecondEntry()
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
// CHECK-NEXT: call void @"?SecondEntry@@YAXXZ"()
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
// NOINLINE-NEXT: call void @"?SecondEntry@@YAXXZ"()
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void


// Verify the constructor is alwaysinline
// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[IntAttr:\#[0-9]+]]

// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
51 changes: 31 additions & 20 deletions clang/test/CodeGenHLSL/GlobalDestructors.hlsl
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK

// Make sure global variable for dtors exist for lib profile.
// Tests that constructors and destructors are appropriately generated for globals
// and that their calls are inlined when AlwaysInline is run
// but global variables are retained for the library profiles

// Make sure global variable for ctors/dtors exist for lib profile.
// LIB:@llvm.global_ctors
// LIB:@llvm.global_dtors
// Make sure global variable for dtors removed for compute profile.
// CS-NOT:llvm.global_dtors
// Make sure global variable for ctors/dtors removed for compute profile.
// CS-NOT:@llvm.global_ctors
// CS-NOT:@llvm.global_dtors

struct Tail {
Tail() {
Expand Down Expand Up @@ -46,22 +54,25 @@ void main(unsigned GI : SV_GroupIndex) {
Wag();
}

// Make sure global variable for ctors/dtors removed.
// CHECK-NOT:@llvm.global_ctors
// CHECK-NOT:@llvm.global_dtors
//CHECK: define void @main()
//CHECK-NEXT: entry:
//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
//CHECK-NEXT: call void @_GLOBAL__D_a()
//CHECK-NEXT: ret void
// CHECK: define void @main()
// CHECK-NEXT: entry:
// Verify destructor is emitted
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
// NOINLINE-NEXT: call void @_GLOBAL__D_a()
// NOINLINE-NEXT: ret void
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// INLINE: ret void

// This is really just a sanity check I needed for myself to verify that
// function scope static variables also get destroyed properly.

//CHECK: define internal void @_GLOBAL__D_a()
//CHECK-NEXT: entry:
//CHECK-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A")
//CHECK-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A")
//CHECK-NEXT: ret void
// NOINLINE: define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
// NOINLINE-NEXT: entry:
// NOINLINE-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A")
// NOINLINE-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A")
// NOINLINE-NEXT: ret void

// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
1 change: 1 addition & 0 deletions clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV

RWBuffer<float> Buf;
Expand Down
5 changes: 3 additions & 2 deletions clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ void main(unsigned GI : SV_GroupIndex) {
// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
// and confusing to follow so the match here is pretty weak.

// CHECK: define internal void @"?main@@YAXI@Z"
// CHECK-NOT: call
// CHECK: define void @main()
// Verify inlining leaves only calls to "llvm." intrinsics
// CHECK-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
76 changes: 76 additions & 0 deletions clang/test/CodeGenHLSL/inline-constructors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE

// Tests that implicit constructor calls for user classes will always be inlined.

struct Weed {
Weed() {Count += 1;}
[[maybe_unused]] void pull() {Count--;}
static int weedCount() { return Count; }
private:
static int Count;

} YardWeeds;

int Weed::Count = 1; // It begins. . .

struct Kitty {
unsigned burrsInFur;

Kitty() {
burrsInFur = 0;
}

void wanderInYard(int hours) {
burrsInFur = hours*Weed::weedCount()/8;
}

void lick() {
if(burrsInFur) {
burrsInFur--;
Weed w;
}
}

} Nion;

void NionsDay(int hours) {
static Kitty Nion;
Nion.wanderInYard(hours);
while(Nion.burrsInFur) Nion.lick();
}

// CHECK: define void @main()
// CHECK-NEXT: entry:
// Verify constructor is emitted
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0)
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
[shader("compute")]
[numthreads(1,1,1)]
void main(unsigned GI : SV_GroupIndex) {
NionsDay(10);
}


// CHECK: define void @rainyMain()
// CHECK-NEXT: entry:
// Verify constructor is emitted
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
// NOINLINE-NEXT: call void @"?rainyMain@@YAXXZ"()
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
[shader("compute")]
[numthreads(1,1,1)]
void rainyMain() {
NionsDay(1);
}

Loading

0 comments on commit a1f59f2

Please sign in to comment.