forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NVPTX] Fix internal indirect call prototypes not obeying the ABI (ll…
…vm#100131) Summary: The NVPTX backend optimizes the ABI for functions that are internal, however, this is not legal for indirect call prototypes. Previously, we would modify the ABI on an aggregate byval type passed to an indirect call prototype, which would make PTXAS error. This patch just passes the function as a nullptr to force strict ABI compliance without modification in the helper function. Fixes llvm#100055 (cherry picked from commit e0649a5)
- Loading branch information
Showing
3 changed files
with
101 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | FileCheck %s | ||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | %ptxas-verify %} | ||
|
||
target triple = "nvptx64-nvidia-cuda" | ||
|
||
%struct.S = type { i8 } | ||
%struct.U = type { i64 } | ||
|
||
@ptr = external global ptr, align 8 | ||
|
||
define internal i32 @foo() { | ||
; CHECK-LABEL: foo( | ||
; CHECK: { | ||
; CHECK-NEXT: .local .align 1 .b8 __local_depot0[2]; | ||
; CHECK-NEXT: .reg .b64 %SP; | ||
; CHECK-NEXT: .reg .b64 %SPL; | ||
; CHECK-NEXT: .reg .b16 %rs<2>; | ||
; CHECK-NEXT: .reg .b32 %r<3>; | ||
; CHECK-NEXT: .reg .b64 %rd<3>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: // %entry | ||
; CHECK-NEXT: mov.u64 %SPL, __local_depot0; | ||
; CHECK-NEXT: cvta.local.u64 %SP, %SPL; | ||
; CHECK-NEXT: ld.global.u64 %rd1, [ptr]; | ||
; CHECK-NEXT: ld.u8 %rs1, [%SP+1]; | ||
; CHECK-NEXT: add.u64 %rd2, %SP, 0; | ||
; CHECK-NEXT: { // callseq 0, 0 | ||
; CHECK-NEXT: .param .align 1 .b8 param0[1]; | ||
; CHECK-NEXT: st.param.b8 [param0+0], %rs1; | ||
; CHECK-NEXT: .param .b64 param1; | ||
; CHECK-NEXT: st.param.b64 [param1+0], %rd2; | ||
; CHECK-NEXT: .param .b32 retval0; | ||
; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _); | ||
; CHECK-NEXT: call (retval0), | ||
; CHECK-NEXT: %rd1, | ||
; CHECK-NEXT: ( | ||
; CHECK-NEXT: param0, | ||
; CHECK-NEXT: param1 | ||
; CHECK-NEXT: ) | ||
; CHECK-NEXT: , prototype_0; | ||
; CHECK-NEXT: ld.param.b32 %r1, [retval0+0]; | ||
; CHECK-NEXT: } // callseq 0 | ||
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; | ||
; CHECK-NEXT: ret; | ||
entry: | ||
%s = alloca %struct.S, align 1 | ||
%agg.tmp = alloca %struct.S, align 1 | ||
%0 = load ptr, ptr @ptr, align 8 | ||
%call = call i32 %0(ptr byval(%struct.S) align 1 %agg.tmp, ptr noundef %s) | ||
ret i32 %call | ||
} | ||
|
||
define internal i32 @bar() { | ||
; CHECK-LABEL: bar( | ||
; CHECK: // @bar | ||
; CHECK-NEXT: { | ||
; CHECK-NEXT: .local .align 8 .b8 __local_depot1[16]; | ||
; CHECK-NEXT: .reg .b64 %SP; | ||
; CHECK-NEXT: .reg .b64 %SPL; | ||
; CHECK-NEXT: .reg .b32 %r<3>; | ||
; CHECK-NEXT: .reg .b64 %rd<4>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: // %entry | ||
; CHECK-NEXT: mov.u64 %SPL, __local_depot1; | ||
; CHECK-NEXT: cvta.local.u64 %SP, %SPL; | ||
; CHECK-NEXT: ld.global.u64 %rd1, [ptr]; | ||
; CHECK-NEXT: ld.u64 %rd2, [%SP+8]; | ||
; CHECK-NEXT: add.u64 %rd3, %SP, 0; | ||
; CHECK-NEXT: { // callseq 1, 0 | ||
; CHECK-NEXT: .param .align 8 .b8 param0[8]; | ||
; CHECK-NEXT: st.param.b64 [param0+0], %rd2; | ||
; CHECK-NEXT: .param .b64 param1; | ||
; CHECK-NEXT: st.param.b64 [param1+0], %rd3; | ||
; CHECK-NEXT: .param .b32 retval0; | ||
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _); | ||
; CHECK-NEXT: call (retval0), | ||
; CHECK-NEXT: %rd1, | ||
; CHECK-NEXT: ( | ||
; CHECK-NEXT: param0, | ||
; CHECK-NEXT: param1 | ||
; CHECK-NEXT: ) | ||
; CHECK-NEXT: , prototype_1; | ||
; CHECK-NEXT: ld.param.b32 %r1, [retval0+0]; | ||
; CHECK-NEXT: } // callseq 1 | ||
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; | ||
; CHECK-NEXT: ret; | ||
entry: | ||
%s = alloca %struct.U, align 8 | ||
%agg.tmp = alloca %struct.U, align 8 | ||
%0 = load ptr, ptr @ptr, align 8 | ||
%call = call noundef i32 %0(ptr byval(%struct.U) align 8 %agg.tmp, ptr %s) | ||
ret i32 %call | ||
} |