From 00179e92c147e16de1f7c653f88c8805aef820c1 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Wed, 15 May 2024 15:23:29 -0700 Subject: [PATCH 01/16] [bazel] Add new dependencies (#92323) This also fixes building ... on Linux. Seems like target_compatible_with isn't enough but you also need a manual tag. --- utils/bazel/llvm-project-overlay/lldb/BUILD.bazel | 5 ++++- utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel index c6fc4e08aa7271..ddcaea5184d434 100644 --- a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel @@ -916,7 +916,10 @@ apple_genrule( srcs = [":debugserver_unsigned"], outs = ["debugserver"], cmd = "cp $(SRCS) $(OUTS) && xcrun codesign -f -s - --entitlements $(location tools/debugserver/resources/debugserver-macosx-entitlements.plist) $(OUTS)", - tags = ["nobuildkite"], + tags = [ + "manual", + "nobuildkite", + ], target_compatible_with = select({ "@platforms//os:macos": [], "//conditions:default": ["@platforms//:incompatible"], diff --git a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel index 21f0c7092f32e9..b44489e213a400 100644 --- a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel @@ -617,6 +617,7 @@ cc_test( allow_empty = False, ), deps = [ + "//llvm:BitstreamReader", "//llvm:Core", "//llvm:Coverage", "//llvm:DebugInfo", From 050593fc4f9a7f2b9450ee093c4638b8539315b7 Mon Sep 17 00:00:00 2001 From: Andrey Ali Khan Bolshakov Date: Thu, 16 May 2024 01:39:12 +0300 Subject: [PATCH 02/16] [Coverage] Handle `CoroutineSuspendExpr` correctly (#88898) This avoids visiting `co_await` or `co_yield` operand 5 times (it is repeated under transformed awaiter subexpression, and under `await_ready`, `await_suspend`, and `await_resume` generated call subexpressions). --- clang/lib/CodeGen/CoverageMappingGen.cpp | 4 ++++ clang/test/CoverageMapping/coroutine.cpp | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index ce2f39aeb08213..e46560029ab08d 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1439,6 +1439,10 @@ struct CounterCoverageMappingBuilder terminateRegion(S); } + void VisitCoroutineSuspendExpr(const CoroutineSuspendExpr *E) { + Visit(E->getOperand()); + } + void VisitCXXThrowExpr(const CXXThrowExpr *E) { extendRegion(E); if (E->getSubExpr()) diff --git a/clang/test/CoverageMapping/coroutine.cpp b/clang/test/CoverageMapping/coroutine.cpp index 0105005d198a1c..d322bc351a7276 100644 --- a/clang/test/CoverageMapping/coroutine.cpp +++ b/clang/test/CoverageMapping/coroutine.cpp @@ -32,6 +32,7 @@ struct std::coroutine_traits { suspend_always final_suspend() noexcept; void unhandled_exception() noexcept; void return_value(int); + suspend_always yield_value(int); }; }; @@ -45,3 +46,21 @@ int f1(int x) { // CHECK-NEXT: File 0, [[@LINE]]:15 -> [[@LINE+8]]:2 = #0 } // CHECK-NEXT: File 0, [[@LINE-2]]:10 -> [[@LINE]]:4 = (#0 - #1) co_return x; // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:4 -> [[@LINE]]:3 = #1 } // CHECK-NEXT: File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:14 = #1 + +// CHECK-LABEL: _Z2f2i: +// CHECK-NEXT: File 0, [[@LINE+1]]:15 -> [[@LINE+15]]:2 = #0 +int f2(int x) { +// CHECK-NEXT: File 0, [[@LINE+5]]:13 -> [[@LINE+5]]:18 = #0 +// CHECK-NEXT: Branch,File 0, [[@LINE+4]]:13 -> [[@LINE+4]]:18 = #1, (#0 - #1) +// CHECK-NEXT: Gap,File 0, [[@LINE+3]]:20 -> [[@LINE+3]]:21 = #1 +// CHECK-NEXT: File 0, [[@LINE+2]]:21 -> [[@LINE+2]]:37 = #1 +// CHECK-NEXT: File 0, [[@LINE+1]]:40 -> [[@LINE+1]]:56 = (#0 - #1) + co_await (x > 0 ? suspend_always{} : suspend_always{}); +// CHECK-NEXT: File 0, [[@LINE+5]]:12 -> [[@LINE+5]]:17 = #0 +// CHECK-NEXT: Branch,File 0, [[@LINE+4]]:12 -> [[@LINE+4]]:17 = #2, (#0 - #2) +// CHECK-NEXT: Gap,File 0, [[@LINE+3]]:19 -> [[@LINE+3]]:20 = #2 +// CHECK-NEXT: File 0, [[@LINE+2]]:20 -> [[@LINE+2]]:21 = #2 +// CHECK-NEXT: File 0, [[@LINE+1]]:24 -> [[@LINE+1]]:25 = (#0 - #2) + co_yield x > 0 ? 1 : 2; + co_return 0; +} From 5ff6c6542ac451daaed6c417e481e313165d3454 Mon Sep 17 00:00:00 2001 From: Andrey Ali Khan Bolshakov Date: Thu, 16 May 2024 01:40:03 +0300 Subject: [PATCH 03/16] [Coverage] Handle array decomposition correctly (#88881) `ArrayInitLoopExpr` AST node has two occurences of its as-written initializing expression in its subexpressions through a non-unique `OpaqueValueExpr`. It causes double-visiting of the initializing expression if not handled explicitly, as discussed in #85837. --- clang/lib/CodeGen/CoverageMappingGen.cpp | 4 ++++ clang/test/CoverageMapping/decomposition.cpp | 15 +++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 clang/test/CoverageMapping/decomposition.cpp diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index e46560029ab08d..cc8ab7a5b4369e 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -2177,6 +2177,10 @@ struct CounterCoverageMappingBuilder // propagate counts into them. } + void VisitArrayInitLoopExpr(const ArrayInitLoopExpr *AILE) { + Visit(AILE->getCommonExpr()->getSourceExpr()); + } + void VisitPseudoObjectExpr(const PseudoObjectExpr *POE) { // Just visit syntatic expression as this is what users actually write. VisitStmt(POE->getSyntacticForm()); diff --git a/clang/test/CoverageMapping/decomposition.cpp b/clang/test/CoverageMapping/decomposition.cpp new file mode 100644 index 00000000000000..601ea630faeec9 --- /dev/null +++ b/clang/test/CoverageMapping/decomposition.cpp @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -mllvm -emptyline-comment-coverage=false -triple %itanium_abi_triple -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s | FileCheck %s + +// CHECK-LABEL: _Z19array_decompositioni: +// CHECK-NEXT: File 0, [[@LINE+6]]:32 -> {{[0-9]+}}:2 = #0 +// CHECK-NEXT: File 0, [[@LINE+8]]:20 -> [[@LINE+8]]:25 = #0 +// CHECK-NEXT: Branch,File 0, [[@LINE+7]]:20 -> [[@LINE+7]]:25 = #1, (#0 - #1) +// CHECK-NEXT: Gap,File 0, [[@LINE+6]]:27 -> [[@LINE+6]]:28 = #1 +// CHECK-NEXT: File 0, [[@LINE+5]]:28 -> [[@LINE+5]]:29 = #1 +// CHECK-NEXT: File 0, [[@LINE+4]]:32 -> [[@LINE+4]]:33 = (#0 - #1) +int array_decomposition(int i) { + int a[] = {1, 2, 3}; + int b[] = {4, 5, 6}; + auto [x, y, z] = i > 0 ? a : b; + return x + y + z; +} From aa889d7783af050ce5d19af67c7225ee119d625e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 15 May 2024 15:41:20 -0700 Subject: [PATCH 04/16] [ELF,test] Fix FileCheck prefixes Most violations are stale and should be removed while a few can be adjusted. Reported at #92238 --- lld/test/ELF/arm-exidx-shared.s | 2 +- lld/test/ELF/mips-tls-hilo.s | 10 ---------- lld/test/ELF/ppc32-reloc-rel.s | 3 ++- lld/test/ELF/ppc64-pcrel-call-to-extern.s | 5 ----- lld/test/ELF/ppc64-toc-relax-ifunc.s | 13 +++++-------- lld/test/ELF/riscv-gp.s | 4 ---- 6 files changed, 8 insertions(+), 29 deletions(-) diff --git a/lld/test/ELF/arm-exidx-shared.s b/lld/test/ELF/arm-exidx-shared.s index fce605d6d96aa5..2e484e5c065fee 100644 --- a/lld/test/ELF/arm-exidx-shared.s +++ b/lld/test/ELF/arm-exidx-shared.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -filetype=obj -arm-add-build-attributes -triple=armv7a-none-linux-gnueabi %s -o %t // RUN: ld.lld --hash-style=sysv %t --shared -o %t2 // RUN: llvm-readobj --relocations %t2 | FileCheck %s -// RUN: llvm-objdump -s --triple=armv7a-none-linux-gnueabi %t2 | FileCheck --check-prefix=CHECK-EXTAB-NEXT %s +// RUN: llvm-objdump -s --triple=armv7a-none-linux-gnueabi %t2 | FileCheck --check-prefix=CHECK-EXTAB %s // Check that the relative R_ARM_PREL31 relocation can access a PLT entry // for when the personality routine is referenced from a shared library. diff --git a/lld/test/ELF/mips-tls-hilo.s b/lld/test/ELF/mips-tls-hilo.s index 6fd2033aac4142..9c67f9fe14ba6f 100644 --- a/lld/test/ELF/mips-tls-hilo.s +++ b/lld/test/ELF/mips-tls-hilo.s @@ -28,16 +28,6 @@ # CHECK-NEXT: ] # CHECK-NOT: Primary GOT -# SO: Relocations [ -# SO-NEXT: ] -# SO: Primary GOT { -# SO: Local entries [ -# SO-NEXT: ] -# SO-NEXT: Global entries [ -# SO-NEXT: ] -# SO-NEXT: Number of TLS and multi-GOT entries: 0 -# SO-NEXT: } - .text .globl __start .type __start,@function diff --git a/lld/test/ELF/ppc32-reloc-rel.s b/lld/test/ELF/ppc32-reloc-rel.s index b89e0b43cb785c..d13ebdb7997f7c 100644 --- a/lld/test/ELF/ppc32-reloc-rel.s +++ b/lld/test/ELF/ppc32-reloc-rel.s @@ -6,6 +6,7 @@ # RUN: llvm-mc -filetype=obj -triple=powerpcle %s -o %t.le.o # RUN: ld.lld %t.le.o -o %t # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +# RUN: llvm-objdump -s %t | FileCheck %s --check-prefix=HEX .section .R_PPC_REL14,"ax",@progbits beq 1f @@ -23,7 +24,7 @@ .long 1f - . 1: # HEX-LABEL: section .R_PPC_REL32: -# HEX-NEXT: 10010008 00000004 +# HEX-NEXT: 04000000 .section .R_PPC_PLTREL24,"ax",@progbits b 1f@PLT+32768 diff --git a/lld/test/ELF/ppc64-pcrel-call-to-extern.s b/lld/test/ELF/ppc64-pcrel-call-to-extern.s index e5846e80ce2382..de05b733e175a2 100644 --- a/lld/test/ELF/ppc64-pcrel-call-to-extern.s +++ b/lld/test/ELF/ppc64-pcrel-call-to-extern.s @@ -73,9 +73,7 @@ ## DT_PLTGOT points to .plt # SEC: .plt NOBITS 0000000010030168 040168 000028 00 WA 0 0 8 -# SEC-OG: .plt NOBITS 0000000010030158 040158 000028 00 WA 0 0 8 # SEC: 0x0000000000000003 (PLTGOT) 0x10030168 -# SEC-OG: 0x0000000000000003 (PLTGOT) 0x10030168 ## DT_PLTGOT points to .plt # SEC-NOP10: .plt NOBITS 0000000010030168 040168 000028 00 WA 0 0 8 @@ -86,11 +84,8 @@ ## Check that we emit 3 R_PPC64_JMP_SLOT in .rela.plt. # REL: .rela.plt { # REL-NEXT: 0x10030178 R_PPC64_JMP_SLOT callee_global_stother0 0x0 -# REL-NEXT-OG: 0x10030168 R_PPC64_JMP_SLOT callee_global_stother0 0x0 # REL-NEXT: 0x10030180 R_PPC64_JMP_SLOT callee_global_stother1 0x0 -# REL-NEXT-OG: 0x10030170 R_PPC64_JMP_SLOT callee_global_stother1 0x0 # REL-NEXT: 0x10030188 R_PPC64_JMP_SLOT callee_global_TOC 0x0 -# REL-NEXT-OG: 0x10030178 R_PPC64_JMP_SLOT callee_global_TOC 0x0 # REL-NEXT: } # REL-NOP10: .rela.plt { diff --git a/lld/test/ELF/ppc64-toc-relax-ifunc.s b/lld/test/ELF/ppc64-toc-relax-ifunc.s index 9fb1bf0023b6d4..00a63c7e5b6772 100644 --- a/lld/test/ELF/ppc64-toc-relax-ifunc.s +++ b/lld/test/ELF/ppc64-toc-relax-ifunc.s @@ -4,7 +4,7 @@ # RUN: echo '.globl ifunc; .type ifunc, %gnu_indirect_function; ifunc:' | \ # RUN: llvm-mc -filetype=obj -triple=powerpc64le - -o %t1.o # RUN: ld.lld %t.o %t1.o -o %t -# RUN: llvm-readelf -S -s %t | FileCheck --check-prefix=SEC %s +# RUN: llvm-readelf -Ssr %t | FileCheck --check-prefix=SEC %s # RUN: llvm-readelf -x .toc %t | FileCheck --check-prefix=HEX %s # RUN: llvm-objdump -d %t | FileCheck --check-prefix=DIS %s @@ -13,18 +13,15 @@ ## still perform toc-indirect to toc-relative relaxation because the distance ## to the address of the canonical PLT is fixed. -# SEC: .text PROGBITS 00000000100101e0 -# SEC: .plt NOBITS 0000000010030200 -# SEC: 00000000100101e8 0 FUNC GLOBAL DEFAULT 3 ifunc +# SEC: .text PROGBITS [[#%x,TEXT:]] +# SEC: .plt NOBITS [[#%x,PLT:]] +# SEC: {{0*}}[[#PLT]] {{.+}} R_PPC64_IRELATIVE [[#TEXT+8]] +# SEC: {{0*}}[[#TEXT+8]] 0 FUNC GLOBAL DEFAULT 3 ifunc ## .toc[0] stores the address of the canonical PLT. # HEX: section '.toc': # HEX-NEXT: 0x100201f8 e8010110 00000000 -# REL: .rela.dyn { -# REL-NEXT: 0x100301f8 R_PPC64_IRELATIVE - 0x100101e8 -# REL-NEXT: } - # DIS: addi 3, 3, addis 3, 2, .toc@toc@ha diff --git a/lld/test/ELF/riscv-gp.s b/lld/test/ELF/riscv-gp.s index 29411d19b01929..e82e36ee9a7ae5 100644 --- a/lld/test/ELF/riscv-gp.s +++ b/lld/test/ELF/riscv-gp.s @@ -16,10 +16,6 @@ # SEC64: [ [[#SDATA:]]] .sdata PROGBITS {{0*}}000032e0 # SEC64: {{0*}}00003ae0 0 NOTYPE GLOBAL DEFAULT [[#SDATA]] __global_pointer$ -## __global_pointer$ - 0x1000 = 4096*3-2048 -# DIS: 1000: auipc gp, 3 -# DIS-NEXT: addi gp, gp, -2048 - # ERR: error: relocation R_RISCV_PCREL_HI20 cannot be used against symbol '__global_pointer$'; recompile with -fPIC ## -r mode does not define __global_pointer$. From 0585eed9409c1362f7deaabc42c1d3c3f55c4b6c Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 15 May 2024 15:44:05 -0700 Subject: [PATCH 05/16] [lldb-dap] Support publishing to the VSCode market place (#92320) Update the publisher and add a publish script that we can use from Github actions. --- lldb/tools/lldb-dap/package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index 2e8ad074256bf5..aeb24445551c1e 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -2,7 +2,7 @@ "name": "lldb-dap", "displayName": "LLDB DAP", "version": "0.2.0", - "publisher": "llvm", + "publisher": "llvm-vs-code-extensions", "homepage": "https://lldb.llvm.org", "description": "LLDB debugging from VSCode", "license": "Apache 2.0 License with LLVM exceptions", @@ -42,6 +42,7 @@ "watch": "tsc -watch -p ./", "format": "npx prettier './src-ts/' --write", "package": "vsce package --out ./out/lldb-dap.vsix", + "publish": "vsce publish", "vscode-uninstall": "code --uninstall-extension llvm.lldb-dap", "vscode-install": "code --install-extension ./out/lldb-dap.vsix" }, From e00a3ccf43563209b71c5b68f56d83f4052dca63 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Wed, 15 May 2024 15:44:37 -0700 Subject: [PATCH 06/16] [flang] New -fdebug-unparse-with-modules option (#91660) This option is a compilation action that parses a source file and performs semantic analysis on it, like the existing -fdebug-unparse option does. Its output, however, is preceded by the effective contents of all of the non-intrinsic modules on which it depends but does not define, transitively preceded by the closure of all of those modules' dependencies. The output from this option is therefore the analyzed parse tree for a source file encapsulated with all of its non-intrinsic module dependencies. This output may be useful for extracting code from large applications for use as an attachment to a bug report, or as input to a test case reduction tool for problem isolation. --- clang/include/clang/Driver/Options.td | 4 +- .../include/flang/Frontend/FrontendActions.h | 4 ++ .../include/flang/Frontend/FrontendOptions.h | 4 ++ .../flang/Semantics/unparse-with-symbols.h | 4 ++ flang/lib/Frontend/CompilerInvocation.cpp | 3 ++ flang/lib/Frontend/FrontendActions.cpp | 9 +++++ .../ExecuteCompilerInvocation.cpp | 2 + flang/lib/Semantics/mod-file.cpp | 20 +++++++++- flang/lib/Semantics/mod-file.h | 3 ++ flang/lib/Semantics/unparse-with-symbols.cpp | 38 +++++++++++++++++++ flang/test/Driver/unparse-with-modules.f90 | 34 +++++++++++++++++ 11 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 flang/test/Driver/unparse-with-modules.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c54eb543d6580e..e579f1a0a3665c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6647,7 +6647,9 @@ def fdebug_unparse : Flag<["-"], "fdebug-unparse">, Group, DocBrief<[{Run the parser and the semantic checks. Then unparse the parse-tree and output the generated Fortran source file.}]>; def fdebug_unparse_with_symbols : Flag<["-"], "fdebug-unparse-with-symbols">, Group, - HelpText<"Unparse and stop.">; + HelpText<"Unparse with symbols and stop.">; +def fdebug_unparse_with_modules : Flag<["-"], "fdebug-unparse-with-modules">, Group, + HelpText<"Unparse with dependent modules and stop.">; def fdebug_dump_symbols : Flag<["-"], "fdebug-dump-symbols">, Group, HelpText<"Dump symbols after the semantic analysis">; def fdebug_dump_parse_tree : Flag<["-"], "fdebug-dump-parse-tree">, Group, diff --git a/flang/include/flang/Frontend/FrontendActions.h b/flang/include/flang/Frontend/FrontendActions.h index e2e859f3a81bd7..7823565eb815f8 100644 --- a/flang/include/flang/Frontend/FrontendActions.h +++ b/flang/include/flang/Frontend/FrontendActions.h @@ -108,6 +108,10 @@ class DebugUnparseWithSymbolsAction : public PrescanAndSemaAction { void executeAction() override; }; +class DebugUnparseWithModulesAction : public PrescanAndSemaAction { + void executeAction() override; +}; + class DebugUnparseAction : public PrescanAndSemaAction { void executeAction() override; }; diff --git a/flang/include/flang/Frontend/FrontendOptions.h b/flang/include/flang/Frontend/FrontendOptions.h index 06b1318f243b08..82ca99672ec610 100644 --- a/flang/include/flang/Frontend/FrontendOptions.h +++ b/flang/include/flang/Frontend/FrontendOptions.h @@ -63,6 +63,10 @@ enum ActionKind { /// Fortran source file DebugUnparseWithSymbols, + /// Parse, run semantics, and output a Fortran source file preceded + /// by all the necessary modules (transitively) + DebugUnparseWithModules, + /// Parse, run semantics and then output symbols from semantics DebugDumpSymbols, diff --git a/flang/include/flang/Semantics/unparse-with-symbols.h b/flang/include/flang/Semantics/unparse-with-symbols.h index d70110245e2b2f..5e18b3fc3063db 100644 --- a/flang/include/flang/Semantics/unparse-with-symbols.h +++ b/flang/include/flang/Semantics/unparse-with-symbols.h @@ -21,8 +21,12 @@ struct Program; } namespace Fortran::semantics { +class SemanticsContext; void UnparseWithSymbols(llvm::raw_ostream &, const parser::Program &, parser::Encoding encoding = parser::Encoding::UTF_8); +void UnparseWithModules(llvm::raw_ostream &, SemanticsContext &, + const parser::Program &, + parser::Encoding encoding = parser::Encoding::UTF_8); } #endif // FORTRAN_SEMANTICS_UNPARSE_WITH_SYMBOLS_H_ diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index db7fd3cccc7a28..e8a8c90045d92d 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -488,6 +488,9 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, case clang::driver::options::OPT_fdebug_unparse_with_symbols: opts.programAction = DebugUnparseWithSymbols; break; + case clang::driver::options::OPT_fdebug_unparse_with_modules: + opts.programAction = DebugUnparseWithModules; + break; case clang::driver::options::OPT_fdebug_dump_symbols: opts.programAction = DebugDumpSymbols; break; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 2f65ab6102f4d9..4341c104a69df2 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -477,6 +477,15 @@ void DebugUnparseWithSymbolsAction::executeAction() { reportFatalSemanticErrors(); } +void DebugUnparseWithModulesAction::executeAction() { + auto &parseTree{*getInstance().getParsing().parseTree()}; + CompilerInstance &ci{getInstance()}; + Fortran::semantics::UnparseWithModules( + llvm::outs(), ci.getSemantics().context(), parseTree, + /*encoding=*/Fortran::parser::Encoding::UTF_8); + reportFatalSemanticErrors(); +} + void DebugDumpSymbolsAction::executeAction() { CompilerInstance &ci = this->getInstance(); diff --git a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 4cad640562c619..e2cbd5112d6ea5 100644 --- a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -59,6 +59,8 @@ createFrontendAction(CompilerInstance &ci) { return std::make_unique(); case DebugUnparseWithSymbols: return std::make_unique(); + case DebugUnparseWithModules: + return std::make_unique(); case DebugDumpSymbols: return std::make_unique(); case DebugDumpParseTree: diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index e9aebe5b08f2ba..bb8c6c7567b8d7 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -132,11 +132,11 @@ static std::string ModFileName(const SourceName &name, // Write the module file for symbol, which must be a module or submodule. void ModFileWriter::Write(const Symbol &symbol) { - auto &module{symbol.get()}; + const auto &module{symbol.get()}; if (module.moduleFileHash()) { return; // already written } - auto *ancestor{module.ancestor()}; + const auto *ancestor{module.ancestor()}; isSubmodule_ = ancestor != nullptr; auto ancestorName{ancestor ? ancestor->GetName().value().ToString() : ""s}; auto path{context_.moduleDirectory() + '/' + @@ -151,6 +151,21 @@ void ModFileWriter::Write(const Symbol &symbol) { const_cast(module).set_moduleFileHash(checkSum); } +void ModFileWriter::WriteClosure(llvm::raw_ostream &out, const Symbol &symbol, + UnorderedSymbolSet &nonIntrinsicModulesWritten) { + if (!symbol.has() || symbol.owner().IsIntrinsicModules() || + !nonIntrinsicModulesWritten.insert(symbol).second) { + return; + } + PutSymbols(DEREF(symbol.scope())); + needsBuf_.clear(); // omit module checksums + auto str{GetAsString(symbol)}; + for (auto depRef : std::move(usedNonIntrinsicModules_)) { + WriteClosure(out, *depRef, nonIntrinsicModulesWritten); + } + out << std::move(str); +} + // Return the entire body of the module file // and clear saved uses, decls, and contains. std::string ModFileWriter::GetAsString(const Symbol &symbol) { @@ -710,6 +725,7 @@ void ModFileWriter::PutUse(const Symbol &symbol) { uses_ << "use,intrinsic::"; } else { uses_ << "use "; + usedNonIntrinsicModules_.insert(module); } uses_ << module.name() << ",only:"; PutGenericName(uses_, symbol); diff --git a/flang/lib/Semantics/mod-file.h b/flang/lib/Semantics/mod-file.h index b4ece4018c054d..739add32c2e0ee 100644 --- a/flang/lib/Semantics/mod-file.h +++ b/flang/lib/Semantics/mod-file.h @@ -35,6 +35,8 @@ class ModFileWriter { public: explicit ModFileWriter(SemanticsContext &context) : context_{context} {} bool WriteAll(); + void WriteClosure(llvm::raw_ostream &, const Symbol &, + UnorderedSymbolSet &nonIntrinsicModulesWritten); private: SemanticsContext &context_; @@ -46,6 +48,7 @@ class ModFileWriter { std::string containsBuf_; // Tracks nested DEC structures and fields of that type UnorderedSymbolSet emittedDECStructures_, emittedDECFields_; + UnorderedSymbolSet usedNonIntrinsicModules_; llvm::raw_string_ostream needs_{needsBuf_}; llvm::raw_string_ostream uses_{usesBuf_}; diff --git a/flang/lib/Semantics/unparse-with-symbols.cpp b/flang/lib/Semantics/unparse-with-symbols.cpp index 67016e85777c7e..c451f885c06279 100644 --- a/flang/lib/Semantics/unparse-with-symbols.cpp +++ b/flang/lib/Semantics/unparse-with-symbols.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "flang/Semantics/unparse-with-symbols.h" +#include "mod-file.h" #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Parser/unparse.h" @@ -98,4 +99,41 @@ void UnparseWithSymbols(llvm::raw_ostream &out, const parser::Program &program, int indent) { visitor.PrintSymbols(location, out, indent); }}; parser::Unparse(out, program, encoding, false, true, &preStatement); } + +// UnparseWithModules() + +class UsedModuleVisitor { +public: + UnorderedSymbolSet &modulesUsed() { return modulesUsed_; } + UnorderedSymbolSet &modulesDefined() { return modulesDefined_; } + template bool Pre(const T &) { return true; } + template void Post(const T &) {} + void Post(const parser::ModuleStmt &module) { + if (module.v.symbol) { + modulesDefined_.insert(*module.v.symbol); + } + } + void Post(const parser::UseStmt &use) { + if (use.moduleName.symbol) { + modulesUsed_.insert(*use.moduleName.symbol); + } + } + +private: + UnorderedSymbolSet modulesUsed_; + UnorderedSymbolSet modulesDefined_; +}; + +void UnparseWithModules(llvm::raw_ostream &out, SemanticsContext &context, + const parser::Program &program, parser::Encoding encoding) { + UsedModuleVisitor visitor; + parser::Walk(program, visitor); + UnorderedSymbolSet nonIntrinsicModulesWritten{ + std::move(visitor.modulesDefined())}; + ModFileWriter writer{context}; + for (SymbolRef moduleRef : visitor.modulesUsed()) { + writer.WriteClosure(out, *moduleRef, nonIntrinsicModulesWritten); + } + parser::Unparse(out, program, encoding, false, true); +} } // namespace Fortran::semantics diff --git a/flang/test/Driver/unparse-with-modules.f90 b/flang/test/Driver/unparse-with-modules.f90 new file mode 100644 index 00000000000000..53997f7804efa4 --- /dev/null +++ b/flang/test/Driver/unparse-with-modules.f90 @@ -0,0 +1,34 @@ +! RUN: %flang_fc1 -I %S/Inputs/module-dir -fdebug-unparse-with-modules %s | FileCheck %s +module m1 + use iso_fortran_env + use BasicTestModuleTwo + implicit none + type(t2) y + real(real32) x +end + +program test + use m1 + use BasicTestModuleTwo + implicit none + x = 123. + y = t2() +end + +!CHECK-NOT: module iso_fortran_env +!CHECK: module basictestmoduletwo +!CHECK: type::t2 +!CHECK: end type +!CHECK: end +!CHECK: module m1 +!CHECK: use :: iso_fortran_env +!CHECK: implicit none +!CHECK: real(kind=real32) x +!CHECK: end module +!CHECK: program test +!CHECK: use :: m1 +!CHECK: use :: basictestmoduletwo +!CHECK: implicit none +!CHECK: x = 123. +!CHECK: y = t2() +!CHECK: end program From 667d12f86e626173726e87e101626a9060b8d967 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Wed, 15 May 2024 18:55:53 -0400 Subject: [PATCH 07/16] [Clang][Sema] Do not mark template parameters in the exception specification as used during partial ordering (#91534) We do not deduce template arguments from the exception specification when determining the primary template of a function template specialization or when taking the address of a function template. Therefore, this patch changes `isAtLeastAsSpecializedAs` such that we do not mark template parameters in the exception specification as 'used' during partial ordering (per [temp.deduct.partial] p12) to prevent the following from being ambiguous: ``` template void f(U) noexcept(noexcept(T())); // #1 template void f(T*) noexcept; // #2 template<> void f(int*) noexcept; // currently ambiguous, selects #2 with this patch applied ``` Although there is no corresponding wording in the standard (see core issue filed here https://github.com/cplusplus/CWG/issues/537), this seems to be the intended behavior given the definition of _deduction substitution loci_ in [temp.deduct.general] p7 (and EDG does the same thing). --- clang/docs/ReleaseNotes.rst | 3 + clang/lib/Sema/SemaTemplateDeduction.cpp | 36 +++++++--- .../temp.deduct/temp.deduct.partial/p3.cpp | 72 +++++++++++++++++++ 3 files changed, 103 insertions(+), 8 deletions(-) create mode 100644 clang/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.partial/p3.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ae699ebfc60383..6f7e54252150c6 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -713,6 +713,9 @@ Bug Fixes to C++ Support - Correctly treat the compound statement of an ``if consteval`` as an immediate context. Fixes (#GH91509). - When partial ordering alias templates against template template parameters, allow pack expansions when the alias has a fixed-size parameter list. Fixes (#GH62529). +- Clang now ignores template parameters only used within the exception specification of candidate function + templates during partial ordering when deducing template arguments from a function declaration or when + taking the address of a function template. Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 853c0e1b50619e..b5d405111fe4cb 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -5453,7 +5453,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S, SourceLocation Loc, // is used. if (DeduceTemplateArgumentsByTypeMatch( S, TemplateParams, FD2->getType(), FD1->getType(), Info, Deduced, - TDF_None, + TDF_AllowCompatibleFunctionType, /*PartialOrdering=*/true) != TemplateDeductionResult::Success) return false; break; @@ -5485,20 +5485,40 @@ static bool isAtLeastAsSpecializedAs(Sema &S, SourceLocation Loc, switch (TPOC) { case TPOC_Call: for (unsigned I = 0, N = Args2.size(); I != N; ++I) - ::MarkUsedTemplateParameters(S.Context, Args2[I], false, - TemplateParams->getDepth(), - UsedParameters); + ::MarkUsedTemplateParameters(S.Context, Args2[I], /*OnlyDeduced=*/false, + TemplateParams->getDepth(), UsedParameters); break; case TPOC_Conversion: - ::MarkUsedTemplateParameters(S.Context, Proto2->getReturnType(), false, + ::MarkUsedTemplateParameters(S.Context, Proto2->getReturnType(), + /*OnlyDeduced=*/false, TemplateParams->getDepth(), UsedParameters); break; case TPOC_Other: - ::MarkUsedTemplateParameters(S.Context, FD2->getType(), false, - TemplateParams->getDepth(), - UsedParameters); + // We do not deduce template arguments from the exception specification + // when determining the primary template of a function template + // specialization or when taking the address of a function template. + // Therefore, we do not mark template parameters in the exception + // specification as used during partial ordering to prevent the following + // from being ambiguous: + // + // template + // void f(U) noexcept(noexcept(T())); // #1 + // + // template + // void f(T*) noexcept; // #2 + // + // template<> + // void f(int*) noexcept; // explicit specialization of #2 + // + // Although there is no corresponding wording in the standard, this seems + // to be the intended behavior given the definition of + // 'deduction substitution loci' in [temp.deduct]. + ::MarkUsedTemplateParameters( + S.Context, + S.Context.getFunctionTypeWithExceptionSpec(FD2->getType(), EST_None), + /*OnlyDeduced=*/false, TemplateParams->getDepth(), UsedParameters); break; } diff --git a/clang/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.partial/p3.cpp b/clang/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.partial/p3.cpp new file mode 100644 index 00000000000000..cc1d4ecda2ecca --- /dev/null +++ b/clang/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.partial/p3.cpp @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// expected-no-diagnostics + +template +struct A { }; + +constexpr A a; +constexpr A b; + +constexpr int* x = nullptr; +constexpr short* y = nullptr; + +namespace ExplicitArgs { + template + constexpr int f(U) noexcept(noexcept(T())) { + return 0; + } + + template + constexpr int f(T*) noexcept { + return 1; + } + + template<> + constexpr int f(int*) noexcept { + return 2; + } + + static_assert(f(1) == 0); + static_assert(f(y) == 1); + static_assert(f(x) == 2); + + template + constexpr int g(U*) noexcept(noexcept(T())) { + return 3; + } + + template + constexpr int g(T) noexcept { + return 4; + } + + template<> + constexpr int g(int*) noexcept { + return 5; + } + + static_assert(g(y) == 3); + static_assert(g(1) == 4); + static_assert(g(x) == 5); +} // namespace ExplicitArgs + +namespace DeducedArgs { + template + constexpr int f(T, A) noexcept(B) { + return 0; + } + + template + constexpr int f(T*, A) noexcept(B && B) { + return 1; + } + + template<> + constexpr int f(int*, A) { + return 2; + } + + static_assert(f(x, a) == 0); + static_assert(f(y, a) == 1); + static_assert(f(x, a) == 2); +} // namespace DeducedArgs From 325d1d0b73aa6bff0ce4174b45a7601f6b32a793 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Wed, 15 May 2024 15:58:20 -0700 Subject: [PATCH 08/16] [flang] Fix purity checking for internal subprograms (#91759) ELEMENTAL internal subprograms are pure unless explicitly IMPURE. --- flang/lib/Semantics/check-purity.cpp | 10 +++-- flang/test/Semantics/pure02.f90 | 59 ++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 flang/test/Semantics/pure02.f90 diff --git a/flang/lib/Semantics/check-purity.cpp b/flang/lib/Semantics/check-purity.cpp index 5176390f366bd5..55a9a2f107388d 100644 --- a/flang/lib/Semantics/check-purity.cpp +++ b/flang/lib/Semantics/check-purity.cpp @@ -39,12 +39,16 @@ bool PurityChecker::InPureSubprogram() const { bool PurityChecker::HasPurePrefix( const std::list &prefixes) const { + bool result{false}; for (const parser::PrefixSpec &prefix : prefixes) { - if (std::holds_alternative(prefix.u)) { - return true; + if (std::holds_alternative(prefix.u)) { + return false; + } else if (std::holds_alternative(prefix.u) || + std::holds_alternative(prefix.u)) { + result = true; } } - return false; + return result; } void PurityChecker::Entered( diff --git a/flang/test/Semantics/pure02.f90 b/flang/test/Semantics/pure02.f90 new file mode 100644 index 00000000000000..11dc0fd268293b --- /dev/null +++ b/flang/test/Semantics/pure02.f90 @@ -0,0 +1,59 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +pure subroutine s1 + contains + !ERROR: An internal subprogram of a pure subprogram must also be pure + subroutine t1 + end + pure subroutine t2 ! ok + end + elemental subroutine t3(k) ! ok + integer, intent(in) :: k + end + !ERROR: An internal subprogram of a pure subprogram must also be pure + impure elemental subroutine t4(k) + integer, intent(in) :: k + end + !ERROR: An internal subprogram of a pure subprogram must also be pure + elemental impure subroutine t5(k) + integer, intent(in) :: k + end +end + +elemental subroutine s2(j) + integer, intent(in) :: j + contains + !ERROR: An internal subprogram of a pure subprogram must also be pure + subroutine t1 + end + pure subroutine t2 ! ok + end + elemental subroutine t3(k) ! ok + integer, intent(in) :: k + end + !ERROR: An internal subprogram of a pure subprogram must also be pure + impure elemental subroutine t4(k) + integer, intent(in) :: k + end + !ERROR: An internal subprogram of a pure subprogram must also be pure + elemental impure subroutine t5(k) + integer, intent(in) :: k + end +end + +impure elemental subroutine s3(j) + integer, intent(in) :: j + contains + subroutine t1 + end + pure subroutine t2 + end + elemental subroutine t3(k) + integer, intent(in) :: k + end + impure elemental subroutine t4(k) + integer, intent(in) :: k + end + elemental impure subroutine t5(k) + integer, intent(in) :: k + end +end From c227bf1b217598066acd32de8c9a75c2e0928f89 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Wed, 15 May 2024 20:01:17 -0300 Subject: [PATCH 09/16] [clang] Create new warning group for deprecation of '-fno-relaxed-template-template-args' (#92324) This allows the warning to be disabled in isolation, as it helps when treating them as errors. --- clang/docs/ReleaseNotes.rst | 3 ++- clang/include/clang/Basic/DiagnosticDriverKinds.td | 3 +++ clang/include/clang/Basic/DiagnosticGroups.td | 2 ++ clang/lib/Driver/ToolChains/Clang.cpp | 10 +++++++--- clang/test/Driver/frelaxed-template-template-args.cpp | 4 +++- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6f7e54252150c6..089a85c8cb3659 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -51,7 +51,8 @@ C++ Specific Potentially Breaking Changes - The behavior controlled by the `-frelaxed-template-template-args` flag is now on by default, and the flag is deprecated. Until the flag is finally removed, it's negative spelling can be used to obtain compatibility with previous - versions of clang. + versions of clang. The deprecation warning for the negative spelling can be + disabled with `-Wno-deprecated-no-relaxed-template-template-args`. - Clang now rejects pointer to member from parenthesized expression in unevaluated context such as ``decltype(&(foo::bar))``. (#GH40906). diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 9781fcaa4ff5e9..9d97a75f696f66 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -436,6 +436,9 @@ def warn_drv_clang_unsupported : Warning< "the clang compiler does not support '%0'">; def warn_drv_deprecated_arg : Warning< "argument '%0' is deprecated%select{|, use '%2' instead}1">, InGroup; +def warn_drv_deprecated_arg_no_relaxed_template_template_args : Warning< + "argument '-fno-relaxed-template-template-args' is deprecated">, + InGroup; def warn_drv_deprecated_custom : Warning< "argument '%0' is deprecated, %1">, InGroup; def warn_drv_assuming_mfloat_abi_is : Warning< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 2beb1d45124b49..4cb4f3d999f7ab 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -104,6 +104,7 @@ def EnumConversion : DiagGroup<"enum-conversion", [EnumEnumConversion, EnumFloatConversion, EnumCompareConditional]>; +def DeprecatedNoRelaxedTemplateTemplateArgs : DiagGroup<"deprecated-no-relaxed-template-template-args">; def ObjCSignedCharBoolImplicitIntConversion : DiagGroup<"objc-signed-char-bool-implicit-int-conversion">; def Shorten64To32 : DiagGroup<"shorten-64-to-32">; @@ -228,6 +229,7 @@ def Deprecated : DiagGroup<"deprecated", [DeprecatedAnonEnumEnumConversion, DeprecatedLiteralOperator, DeprecatedPragma, DeprecatedRegister, + DeprecatedNoRelaxedTemplateTemplateArgs, DeprecatedThisCapture, DeprecatedType, DeprecatedVolatile, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 42feb1650574ed..c3e6d563f3bd21 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7253,10 +7253,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_frelaxed_template_template_args, options::OPT_fno_relaxed_template_template_args)) { - D.Diag(diag::warn_drv_deprecated_arg) - << A->getAsString(Args) << /*hasReplacement=*/false; - if (A->getOption().matches(options::OPT_fno_relaxed_template_template_args)) + if (A->getOption().matches( + options::OPT_fno_relaxed_template_template_args)) { + D.Diag(diag::warn_drv_deprecated_arg_no_relaxed_template_template_args); CmdArgs.push_back("-fno-relaxed-template-template-args"); + } else { + D.Diag(diag::warn_drv_deprecated_arg) + << A->getAsString(Args) << /*hasReplacement=*/false; + } } // -fsized-deallocation is off by default, as it is an ABI-breaking change for diff --git a/clang/test/Driver/frelaxed-template-template-args.cpp b/clang/test/Driver/frelaxed-template-template-args.cpp index 57fc4e3da6e5d0..7a7fd6f0bbc8f9 100644 --- a/clang/test/Driver/frelaxed-template-template-args.cpp +++ b/clang/test/Driver/frelaxed-template-template-args.cpp @@ -1,7 +1,9 @@ // RUN: %clang -fsyntax-only -### %s 2>&1 | FileCheck --check-prefix=CHECK-DEF %s // RUN: %clang -fsyntax-only -frelaxed-template-template-args %s 2>&1 | FileCheck --check-prefix=CHECK-ON %s // RUN: %clang -fsyntax-only -fno-relaxed-template-template-args %s 2>&1 | FileCheck --check-prefix=CHECK-OFF %s +// RUN: %clang -fsyntax-only -fno-relaxed-template-template-args -Wno-deprecated-no-relaxed-template-template-args %s 2>&1 | FileCheck --check-prefix=CHECK-DIS --allow-empty %s // CHECK-DEF-NOT: "-cc1"{{.*}} "-fno-relaxed-template-template-args" // CHECK-ON: warning: argument '-frelaxed-template-template-args' is deprecated [-Wdeprecated] -// CHECK-OFF: warning: argument '-fno-relaxed-template-template-args' is deprecated [-Wdeprecated] +// CHECK-OFF: warning: argument '-fno-relaxed-template-template-args' is deprecated [-Wdeprecated-no-relaxed-template-template-args] +// CHECK-DIS-NOT: warning: argument '-fno-relaxed-template-template-args' is deprecated From 7605ad8a2f95e3b37de83e7fb3d320efc74e0ccc Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Wed, 15 May 2024 16:08:06 -0700 Subject: [PATCH 10/16] [flang] Always check procedure characterizability (#92008) When a procedure is defined with a subprogram but never referenced in a compilation unit, it may not be characterized until lowering, and any errors in characterization then may crash the compiler. So always ensure that procedure definitions are characterizable in declaration checking. Fixes https://github.com/llvm/llvm-project/issues/91845. --- flang/lib/Semantics/check-declarations.cpp | 9 +++++++++ flang/lib/Semantics/resolve-names.cpp | 3 +-- flang/test/Semantics/entry01.f90 | 2 ++ flang/test/Semantics/resolve102.f90 | 23 +++++----------------- 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index ce7870b8d54e4d..8d17989ac2793f 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -1357,6 +1357,15 @@ bool CheckHelper::IsResultOkToDiffer(const FunctionResult &result) { void CheckHelper::CheckSubprogram( const Symbol &symbol, const SubprogramDetails &details) { + // Evaluate a procedure definition's characteristics to flush out + // any errors that analysis might expose, in case this subprogram hasn't + // had any calls in this compilation unit that would have validated them. + if (!context_.HasError(symbol) && !details.isDummy() && + !details.isInterface() && !details.stmtFunction()) { + if (!Procedure::Characterize(symbol, foldingContext_)) { + context_.SetError(symbol); + } + } if (const Symbol *iface{FindSeparateModuleSubprogramInterface(&symbol)}) { SubprogramMatchHelper{*this}.Check(symbol, *iface); } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index e2875081b732c7..5626f2a8be97aa 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -5013,8 +5013,7 @@ bool DeclarationVisitor::HasCycle( if (procsInCycle.count(*interface) > 0) { for (const auto &procInCycle : procsInCycle) { Say(procInCycle->name(), - "The interface for procedure '%s' is recursively " - "defined"_err_en_US, + "The interface for procedure '%s' is recursively defined"_err_en_US, procInCycle->name()); context().SetError(*procInCycle); } diff --git a/flang/test/Semantics/entry01.f90 b/flang/test/Semantics/entry01.f90 index 970cd109921a15..765b18c2e81a8a 100644 --- a/flang/test/Semantics/entry01.f90 +++ b/flang/test/Semantics/entry01.f90 @@ -83,6 +83,7 @@ function ifunc() !ERROR: 'ibad1' is already declared in this scoping unit entry ibad1() result(ibad1res) ! C1570 !ERROR: 'ibad2' is already declared in this scoping unit + !ERROR: Procedure 'ibad2' is referenced before being sufficiently defined in a context where it must be so entry ibad2() !ERROR: ENTRY in a function may not have an alternate return dummy argument entry ibadalt(*) ! C1573 @@ -91,6 +92,7 @@ function ifunc() entry iok() !ERROR: Explicit RESULT('iok') of function 'isameres2' cannot have the same name as a distinct ENTRY into the same scope entry isameres2() result(iok) ! C1574 + !ERROR: Procedure 'iok2' is referenced before being sufficiently defined in a context where it must be so !ERROR: Explicit RESULT('iok2') of function 'isameres3' cannot have the same name as a distinct ENTRY into the same scope entry isameres3() result(iok2) ! C1574 !ERROR: 'iok2' is already declared in this scoping unit diff --git a/flang/test/Semantics/resolve102.f90 b/flang/test/Semantics/resolve102.f90 index 8f6e2246a57e79..33cf6fa245ea49 100644 --- a/flang/test/Semantics/resolve102.f90 +++ b/flang/test/Semantics/resolve102.f90 @@ -4,17 +4,12 @@ !ERROR: Procedure 'sub' is recursively defined. Procedures in the cycle: 'sub', 'p2' subroutine sub(p2) PROCEDURE(sub) :: p2 - - call sub() end subroutine subroutine circular - !ERROR: Procedure 'p' is recursively defined. Procedures in the cycle: 'p', 'sub', 'p2' procedure(sub) :: p - - call p(sub) - contains + !ERROR: Procedure 'sub' is recursively defined. Procedures in the cycle: 'p', 'sub', 'p2' subroutine sub(p2) procedure(p) :: p2 end subroutine @@ -41,11 +36,10 @@ subroutine sub(p2) subroutine mutual Procedure(sub1) :: p - - Call p(sub) - contains !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: 'p', 'sub1', 'arg' + !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: 'sub1', 'arg', 'sub', 'p2' + !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: 'sub1', 'arg' Subroutine sub1(arg) procedure(sub1) :: arg End Subroutine @@ -57,15 +51,14 @@ Subroutine sub(p2) subroutine mutual1 Procedure(sub1) :: p - - Call p(sub) - contains !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: 'p', 'sub1', 'arg', 'sub', 'p2' + !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: 'sub1', 'arg', 'sub', 'p2' Subroutine sub1(arg) procedure(sub) :: arg End Subroutine + !ERROR: Procedure 'sub' is recursively defined. Procedures in the cycle: 'sub1', 'arg', 'sub', 'p2' Subroutine sub(p2) Procedure(sub1) :: p2 End Subroutine @@ -76,8 +69,6 @@ subroutine twoCycle !ERROR: The interface for procedure 'p2' is recursively defined procedure(p1) p2 procedure(p2) p1 - call p1 - call p2 end subroutine subroutine threeCycle @@ -87,9 +78,6 @@ subroutine threeCycle !ERROR: The interface for procedure 'p3' is recursively defined procedure(p2) p3 procedure(p3) p1 - call p1 - call p2 - call p3 end subroutine module mutualSpecExprs @@ -118,4 +106,3 @@ function ifunc(x) ifunc = x end end - From 463f58a564a8d136b3e5d56d23bb86b99ab75245 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Wed, 15 May 2024 16:18:47 -0700 Subject: [PATCH 11/16] [flang] Further work on relaxing BIND(C) enforcement (#92029) When a BIND(C) interface or subprogram has a dummy argument whose derived type is not BIND(C) but meets the constraints and requirements of a BIND(C) type, accept it with a warning. --- flang/lib/Semantics/check-declarations.cpp | 16 +++++--- flang/test/Semantics/bind-c15.f90 | 45 ++++++++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 flang/test/Semantics/bind-c15.f90 diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 8d17989ac2793f..527a1a9539aa6c 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -2891,7 +2891,8 @@ parser::Messages CheckHelper::WhyNotInteroperableDerivedType( } else { bool interoperableParent{true}; if (parent->symbol()) { - auto bad{WhyNotInteroperableDerivedType(*parent->symbol(), false)}; + auto bad{WhyNotInteroperableDerivedType( + *parent->symbol(), /*isError=*/false)}; if (bad.AnyFatalError()) { auto &msg{msgs.Say(symbol.name(), "The parent of an interoperable type is not interoperable"_err_en_US)}; @@ -2981,6 +2982,9 @@ parser::Messages CheckHelper::WhyNotInteroperableDerivedType( } } } + if (msgs.AnyFatalError()) { + examinedByWhyNotInteroperableDerivedType_.erase(symbol); + } return msgs; } @@ -3068,8 +3072,8 @@ void CheckHelper::CheckBindC(const Symbol &symbol) { } context_.SetError(symbol); } else if (auto bad{WhyNotInteroperableDerivedType( - derived->typeSymbol(), false)}; - !bad.empty()) { + derived->typeSymbol(), /*isError=*/false)}; + bad.AnyFatalError()) { if (auto *msg{messages_.Say(symbol.name(), "The derived type of an interoperable object must be interoperable, but is not"_err_en_US)}) { msg->Attach( @@ -3077,7 +3081,9 @@ void CheckHelper::CheckBindC(const Symbol &symbol) { bad.AttachTo(*msg, parser::Severity::None); } context_.SetError(symbol); - } else { + } else if (context_.ShouldWarn( + common::LanguageFeature::NonBindCInteroperability) && + !InModuleFile()) { if (auto *msg{messages_.Say(symbol.name(), "The derived type of an interoperable object should be BIND(C)"_warn_en_US)}) { msg->Attach(derived->typeSymbol().name(), "Non-BIND(C) type"_en_US); @@ -3151,7 +3157,7 @@ void CheckHelper::CheckBindC(const Symbol &symbol) { } } } else if (symbol.has()) { - if (auto msgs{WhyNotInteroperableDerivedType(symbol, false)}; + if (auto msgs{WhyNotInteroperableDerivedType(symbol, /*isError=*/false)}; !msgs.empty()) { bool anyFatal{msgs.AnyFatalError()}; if (msgs.AnyFatalError() || diff --git a/flang/test/Semantics/bind-c15.f90 b/flang/test/Semantics/bind-c15.f90 new file mode 100644 index 00000000000000..9aaad52cc0e0a4 --- /dev/null +++ b/flang/test/Semantics/bind-c15.f90 @@ -0,0 +1,45 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic + +module m + type, bind(c) :: explicit_bind_c + real a + end type + type :: interoperable1 + type(explicit_bind_c) a + end type + type, extends(interoperable1) :: interoperable2 + real b + end type + type :: non_interoperable1 + real, allocatable :: a + end type + type :: non_interoperable2 + type(non_interoperable1) b + end type + interface + subroutine sub_bind_c_1(x_bind_c) bind(c) + import explicit_bind_c + type(explicit_bind_c), intent(in) :: x_bind_c + end + subroutine sub_bind_c_2(x_interop1) bind(c) + import interoperable1 + !WARNING: The derived type of an interoperable object should be BIND(C) + type(interoperable1), intent(in) :: x_interop1 + end + subroutine sub_bind_c_3(x_interop2) bind(c) + import interoperable2 + !WARNING: The derived type of an interoperable object should be BIND(C) + type(interoperable2), intent(in) :: x_interop2 + end + subroutine sub_bind_c_4(x_non_interop1) bind(c) + import non_interoperable1 + !ERROR: The derived type of an interoperable object must be interoperable, but is not + type(non_interoperable1), intent(in) :: x_non_interop1 + end + subroutine sub_bind_c_5(x_non_interop2) bind(c) + import non_interoperable2 + !ERROR: The derived type of an interoperable object must be interoperable, but is not + type(non_interoperable2), intent(in) :: x_non_interop2 + end + end interface +end From 5bbb63bd6d6d3929de643fcd88babbda20c97b69 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Wed, 15 May 2024 16:28:58 -0700 Subject: [PATCH 12/16] [flang] Parse REDUCE clauses in !$CUF KERNEL DO (#92154) A !$CUF KERNEL DO directive is allowed to have advisory REDUCE clauses similar to those in OpenACC and DO CONCURRENT. Parse and represent them. Semantic validation will follow. --- flang/include/flang/Parser/dump-parse-tree.h | 1 + flang/include/flang/Parser/parse-tree.h | 18 ++++- flang/lib/Parser/executable-parsers.cpp | 23 +++++-- flang/lib/Parser/openacc-parsers.cpp | 6 +- flang/lib/Parser/unparse.cpp | 36 +++++++++- flang/lib/Semantics/check-cuda.cpp | 44 ++++++++++++ flang/lib/Semantics/resolve-directives.h | 2 +- flang/lib/Semantics/resolve-names.cpp | 2 +- flang/test/Parser/cuf-sanity-common | 7 ++ flang/test/Parser/cuf-sanity-unparse.CUF | 6 ++ flang/test/Semantics/reduce.cuf | 72 ++++++++++++++++++++ 11 files changed, 199 insertions(+), 18 deletions(-) create mode 100644 flang/test/Semantics/reduce.cuf diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 477d391277ee25..68ae50c312cdeb 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -236,6 +236,7 @@ class ParseTreeDumper { NODE(parser, CUFKernelDoConstruct) NODE(CUFKernelDoConstruct, StarOrExpr) NODE(CUFKernelDoConstruct, Directive) + NODE(parser, CUFReduction) NODE(parser, CycleStmt) NODE(parser, DataComponentDefStmt) NODE(parser, DataIDoObject) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index c0635445837904..0a40aa8b8f616f 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4303,12 +4303,23 @@ struct OpenACCConstruct { }; // CUF-kernel-do-construct -> -// !$CUF KERNEL DO [ (scalar-int-constant-expr) ] <<< grid, block [, stream] -// >>> do-construct +// !$CUF KERNEL DO [ (scalar-int-constant-expr) ] +// <<< grid, block [, stream] >>> +// [ cuf-reduction... ] +// do-construct // star-or-expr -> * | scalar-int-expr // grid -> * | scalar-int-expr | ( star-or-expr-list ) // block -> * | scalar-int-expr | ( star-or-expr-list ) // stream -> 0, scalar-int-expr | STREAM = scalar-int-expr +// cuf-reduction -> [ REDUCE | REDUCTION ] ( +// acc-reduction-op : scalar-variable-list ) + +struct CUFReduction { + TUPLE_CLASS_BOILERPLATE(CUFReduction); + using Operator = AccReductionOperator; + std::tuple>> t; +}; + struct CUFKernelDoConstruct { TUPLE_CLASS_BOILERPLATE(CUFKernelDoConstruct); WRAPPER_CLASS(StarOrExpr, std::optional); @@ -4316,7 +4327,8 @@ struct CUFKernelDoConstruct { TUPLE_CLASS_BOILERPLATE(Directive); CharBlock source; std::tuple, std::list, - std::list, std::optional> + std::list, std::optional, + std::list> t; }; std::tuple> t; diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp index 07a570bd61e990..382a593416872a 100644 --- a/flang/lib/Parser/executable-parsers.cpp +++ b/flang/lib/Parser/executable-parsers.cpp @@ -538,25 +538,34 @@ TYPE_CONTEXT_PARSER("UNLOCK statement"_en_US, construct("UNLOCK (" >> lockVariable, defaulted("," >> nonemptyList(statOrErrmsg)) / ")")) -// CUF-kernel-do-construct -> CUF-kernel-do-directive do-construct -// CUF-kernel-do-directive -> -// !$CUF KERNEL DO [ (scalar-int-constant-expr) ] <<< grid, block [, stream] -// >>> do-construct +// CUF-kernel-do-construct -> +// !$CUF KERNEL DO [ (scalar-int-constant-expr) ] +// <<< grid, block [, stream] >>> +// [ cuf-reduction... ] +// do-construct // star-or-expr -> * | scalar-int-expr // grid -> * | scalar-int-expr | ( star-or-expr-list ) // block -> * | scalar-int-expr | ( star-or-expr-list ) -// stream -> ( 0, | STREAM = ) scalar-int-expr +// stream -> 0, scalar-int-expr | STREAM = scalar-int-expr +// cuf-reduction -> [ REDUCTION | REDUCE ] ( +// acc-reduction-op : scalar-variable-list ) + constexpr auto starOrExpr{construct( "*" >> pure>() || applyFunction(presentOptional, scalarIntExpr))}; constexpr auto gridOrBlock{parenthesized(nonemptyList(starOrExpr)) || applyFunction(singletonList, starOrExpr)}; + +TYPE_PARSER(("REDUCTION"_tok || "REDUCE"_tok) >> + parenthesized(construct(Parser{}, + ":" >> nonemptyList(scalar(variable))))) + TYPE_PARSER(sourced(beginDirective >> "$CUF KERNEL DO"_tok >> construct( maybe(parenthesized(scalarIntConstantExpr)), "<<<" >> gridOrBlock, "," >> gridOrBlock, - maybe((", 0 ,"_tok || ", STREAM ="_tok) >> scalarIntExpr) / ">>>" / - endDirective))) + maybe((", 0 ,"_tok || ", STREAM ="_tok) >> scalarIntExpr) / ">>>", + many(Parser{}) / endDirective))) TYPE_CONTEXT_PARSER("!$CUF KERNEL DO construct"_en_US, extension(construct( Parser{}, diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp index 946b33d0084a96..3d919e29a24826 100644 --- a/flang/lib/Parser/openacc-parsers.cpp +++ b/flang/lib/Parser/openacc-parsers.cpp @@ -19,9 +19,9 @@ // OpenACC Directives and Clauses namespace Fortran::parser { -constexpr auto startAccLine = skipStuffBeforeStatement >> - ("!$ACC "_sptok || "C$ACC "_sptok || "*$ACC "_sptok); -constexpr auto endAccLine = space >> endOfLine; +constexpr auto startAccLine{skipStuffBeforeStatement >> + ("!$ACC "_sptok || "C$ACC "_sptok || "*$ACC "_sptok)}; +constexpr auto endAccLine{space >> endOfLine}; // Autogenerated clauses parser. Information is taken from ACC.td and the // parser is generated by tablegen. diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 3398b395f198f8..1639e900903fe9 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2705,7 +2705,6 @@ class UnparseVisitor { void Unparse(const CLASS::ENUM &x) { Word(CLASS::EnumToString(x)); } WALK_NESTED_ENUM(AccDataModifier, Modifier) WALK_NESTED_ENUM(AccessSpec, Kind) // R807 - WALK_NESTED_ENUM(AccReductionOperator, Operator) WALK_NESTED_ENUM(common, TypeParamAttr) // R734 WALK_NESTED_ENUM(common, CUDADataAttr) // CUDA WALK_NESTED_ENUM(common, CUDASubprogramAttrs) // CUDA @@ -2736,6 +2735,31 @@ class UnparseVisitor { WALK_NESTED_ENUM(OmpOrderClause, Type) // OMP order-type WALK_NESTED_ENUM(OmpOrderModifier, Kind) // OMP order-modifier #undef WALK_NESTED_ENUM + void Unparse(const AccReductionOperator::Operator x) { + switch (x) { + case AccReductionOperator::Operator::Plus: + Word("+"); + break; + case AccReductionOperator::Operator::Multiply: + Word("*"); + break; + case AccReductionOperator::Operator::And: + Word(".AND."); + break; + case AccReductionOperator::Operator::Or: + Word(".OR."); + break; + case AccReductionOperator::Operator::Eqv: + Word(".EQV."); + break; + case AccReductionOperator::Operator::Neqv: + Word(".NEQV."); + break; + default: + Word(AccReductionOperator::EnumToString(x)); + break; + } + } void Unparse(const CUFKernelDoConstruct::StarOrExpr &x) { if (x.v) { @@ -2768,13 +2792,19 @@ class UnparseVisitor { if (const auto &stream{std::get<3>(x.t)}) { Word(",STREAM="), Walk(*stream); } - Word(">>>\n"); + Word(">>>"); + Walk(" ", std::get>(x.t), " "); + Word("\n"); } - void Unparse(const CUFKernelDoConstruct &x) { Walk(std::get(x.t)); Walk(std::get>(x.t)); } + void Unparse(const CUFReduction &x) { + Word("REDUCE("); + Walk(std::get(x.t)); + Walk(":", std::get>>(x.t), ",", ")"); + } void Done() const { CHECK(indent_ == 0); } diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp index 96ab9023926330..45217ed2e3ccd2 100644 --- a/flang/lib/Semantics/check-cuda.cpp +++ b/flang/lib/Semantics/check-cuda.cpp @@ -463,6 +463,46 @@ static int DoConstructTightNesting( return 1; } +static void CheckReduce( + SemanticsContext &context, const parser::CUFReduction &reduce) { + auto op{std::get(reduce.t).v}; + for (const auto &var : + std::get>>(reduce.t)) { + if (const auto &typedExprPtr{var.thing.typedExpr}; + typedExprPtr && typedExprPtr->v) { + const auto &expr{*typedExprPtr->v}; + if (auto type{expr.GetType()}) { + auto cat{type->category()}; + bool isOk{false}; + switch (op) { + case parser::AccReductionOperator::Operator::Plus: + case parser::AccReductionOperator::Operator::Multiply: + case parser::AccReductionOperator::Operator::Max: + case parser::AccReductionOperator::Operator::Min: + isOk = cat == TypeCategory::Integer || cat == TypeCategory::Real; + break; + case parser::AccReductionOperator::Operator::Iand: + case parser::AccReductionOperator::Operator::Ior: + case parser::AccReductionOperator::Operator::Ieor: + isOk = cat == TypeCategory::Integer; + break; + case parser::AccReductionOperator::Operator::And: + case parser::AccReductionOperator::Operator::Or: + case parser::AccReductionOperator::Operator::Eqv: + case parser::AccReductionOperator::Operator::Neqv: + isOk = cat == TypeCategory::Logical; + break; + } + if (!isOk) { + context.Say(var.thing.GetSource(), + "!$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type %s"_err_en_US, + type->AsFortran()); + } + } + } + } +} + void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) { auto source{std::get(x.t).source}; const auto &directive{std::get(x.t)}; @@ -489,6 +529,10 @@ void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) { if (innerBlock) { DeviceContextChecker{context_}.Check(*innerBlock); } + for (const auto &reduce : + std::get>(directive.t)) { + CheckReduce(context_, reduce); + } } void CUDAChecker::Enter(const parser::AssignmentStmt &x) { diff --git a/flang/lib/Semantics/resolve-directives.h b/flang/lib/Semantics/resolve-directives.h index 4aef8ad6c40081..5a890c26aa3349 100644 --- a/flang/lib/Semantics/resolve-directives.h +++ b/flang/lib/Semantics/resolve-directives.h @@ -21,7 +21,7 @@ class SemanticsContext; // Name resolution for OpenACC and OpenMP directives void ResolveAccParts( - SemanticsContext &, const parser::ProgramUnit &, Scope *topScope = {}); + SemanticsContext &, const parser::ProgramUnit &, Scope *topScope); void ResolveOmpParts(SemanticsContext &, const parser::ProgramUnit &); void ResolveOmpTopLevelParts(SemanticsContext &, const parser::Program &); diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 5626f2a8be97aa..40eee89de131a3 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -8940,7 +8940,7 @@ bool ResolveNamesVisitor::Pre(const parser::ProgramUnit &x) { FinishSpecificationParts(root); ResolveExecutionParts(root); FinishExecutionParts(root); - ResolveAccParts(context(), x); + ResolveAccParts(context(), x, /*topScope=*/nullptr); ResolveOmpParts(context(), x); return false; } diff --git a/flang/test/Parser/cuf-sanity-common b/flang/test/Parser/cuf-sanity-common index b097a6aa300458..9d73204e3f5f6c 100644 --- a/flang/test/Parser/cuf-sanity-common +++ b/flang/test/Parser/cuf-sanity-common @@ -23,12 +23,19 @@ module m end subroutine subroutine test logical isPinned + real a(10), x, y, z !$cuf kernel do(1) <<<*, *, stream = 1>>> do j = 1, 10 end do !$cuf kernel do <<<1, (2, 3), stream = 1>>> do j = 1, 10 end do + !$cuf kernel do <<<*, *>>> reduce(+:x,y) reduce(*:z) + do j = 1, 10 + x = x + a(j) + y = y + a(j) + z = z * a(j) + end do call globalsub<<<1, 2>>> call globalsub<<<1, 2, 3>>> call globalsub<<<1, 2, 3, 4>>> diff --git a/flang/test/Parser/cuf-sanity-unparse.CUF b/flang/test/Parser/cuf-sanity-unparse.CUF index b6921e74fc05ae..d4be347dd044ea 100644 --- a/flang/test/Parser/cuf-sanity-unparse.CUF +++ b/flang/test/Parser/cuf-sanity-unparse.CUF @@ -34,6 +34,12 @@ include "cuf-sanity-common" !CHECK: !$CUF KERNEL DO <<<1_4,(2_4,3_4),STREAM=1_4>>> !CHECK: DO j=1_4,10_4 !CHECK: END DO +!CHECK: !$CUF KERNEL DO <<<*,*>>> REDUCE(+:x,y) REDUCE(*:z) +!CHECK: DO j=1_4,10_4 +!CHECK: x=x+a(int(j,kind=8)) +!CHECK: y=y+a(int(j,kind=8)) +!CHECK: z=z*a(int(j,kind=8)) +!CHECK: END DO !CHECK: CALL globalsub<<<1_4,2_4>>>() !CHECK: CALL globalsub<<<1_4,2_4,3_4>>>() !CHECK: CALL globalsub<<<1_4,2_4,3_4,4_4>>>() diff --git a/flang/test/Semantics/reduce.cuf b/flang/test/Semantics/reduce.cuf new file mode 100644 index 00000000000000..95ff2e87c09b47 --- /dev/null +++ b/flang/test/Semantics/reduce.cuf @@ -0,0 +1,72 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +subroutine s(n,m,a,l) + integer, intent(in) :: n + integer, intent(in) :: m(n) + real, intent(in) :: a(n) + logical, intent(in) :: l(n) + integer j, mr + real ar + logical lr +!$cuf kernel do <<<*,*>>> reduce (+:mr,ar) + do j=1,n; mr = mr + m(j); ar = ar + a(j); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type LOGICAL(4) +!$cuf kernel do <<<*,*>>> reduce (+:lr) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (*:mr,ar) + do j=1,n; mr = mr * m(j); ar = ar * a(j); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type LOGICAL(4) +!$cuf kernel do <<<*,*>>> reduce (*:lr) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (max:mr,ar) + do j=1,n; mr = max(mr,m(j)); ar = max(ar,a(j)); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type LOGICAL(4) +!$cuf kernel do <<<*,*>>> reduce (max:lr) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (min:mr,ar) + do j=1,n; mr = min(mr,m(j)); ar = min(ar,a(j)); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type LOGICAL(4) +!$cuf kernel do <<<*,*>>> reduce (min:lr) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (iand:mr) + do j=1,n; mr = iand(mr,m(j)); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type REAL(4) +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type LOGICAL(4) +!$cuf kernel do <<<*,*>>> reduce (iand:ar,lr) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (ieor:mr) + do j=1,n; mr = ieor(mr,m(j)); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type REAL(4) +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type LOGICAL(4) +!$cuf kernel do <<<*,*>>> reduce (ieor:ar,lr) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (ior:mr) + do j=1,n; mr = ior(mr,m(j)); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type REAL(4) +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type LOGICAL(4) +!$cuf kernel do <<<*,*>>> reduce (ior:ar,lr) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (.and.:lr) + do j=1,n; lr = lr .and. l(j); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type INTEGER(4) +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type REAL(4) +!$cuf kernel do <<<*,*>>> reduce (.and.:mr,ar) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (.eqv.:lr) + do j=1,n; lr = lr .eqv. l(j); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type INTEGER(4) +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type REAL(4) +!$cuf kernel do <<<*,*>>> reduce (.eqv.:mr,ar) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (.neqv.:lr) + do j=1,n; lr = lr .neqv. l(j); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type INTEGER(4) +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type REAL(4) +!$cuf kernel do <<<*,*>>> reduce (.neqv.:mr,ar) + do j=1,n; end do +!$cuf kernel do <<<*,*>>> reduce (.or.:lr) + do j=1,n; lr = lr .or. l(j); end do +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type INTEGER(4) +!ERROR: !$CUF KERNEL DO REDUCE operation is not acceptable for a variable with type REAL(4) +!$cuf kernel do <<<*,*>>> reduce (.or.:mr,ar) + do j=1,n; end do +end From 3ddfb6807e905868a3a9df71fa5ea87309181270 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Wed, 15 May 2024 16:41:12 -0700 Subject: [PATCH 13/16] [flang] Prevent crash from unfoldable TRANSFER() (#92282) When the MOLD= argument's type is polymorphic, the type of the result cannot be known at compilation time, so the call cannot be folded even when the SOURCE= is constant. Fixes https://github.com/llvm/llvm-project/issues/92264. --- flang/lib/Evaluate/fold.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/lib/Evaluate/fold.cpp b/flang/lib/Evaluate/fold.cpp index ed882958199802..cf6262d9a7c65a 100644 --- a/flang/lib/Evaluate/fold.cpp +++ b/flang/lib/Evaluate/fold.cpp @@ -272,6 +272,7 @@ std::optional> FoldTransfer( } } if (sourceBytes && IsActuallyConstant(*source) && moldType && extents && + !moldType->IsPolymorphic() && (moldLength || moldType->category() != TypeCategory::Character)) { std::size_t elements{ extents->empty() ? 1 : static_cast((*extents)[0])}; From c87b1ca4edefe3c267a20f28eaf79f6b83d36c66 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Wed, 15 May 2024 18:41:25 -0500 Subject: [PATCH 14/16] [InstrProf] Fix bug when clearing traces with samples (#92310) The `--temporal-profile-max-trace-length=0` flag in the `llvm-profdata merge` command is used to remove traces from a profile. There was a bug where traces would not be cleared if the profile was already sampled. This patch fixes that. --- llvm/lib/ProfileData/InstrProfWriter.cpp | 11 ++++++----- llvm/test/tools/llvm-profdata/trace-limit.proftext | 6 +++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index b61c59aacc0f95..c941b9d89df388 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -320,11 +320,8 @@ void InstrProfWriter::addBinaryIds(ArrayRef BIs) { } void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { - if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength) - Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength); - if (Trace.FunctionNameRefs.empty()) - return; - + assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength); + assert(!Trace.FunctionNameRefs.empty()); if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) { // Simply append the trace if we have not yet hit our reservoir size limit. TemporalProfTraces.push_back(std::move(Trace)); @@ -341,6 +338,10 @@ void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { void InstrProfWriter::addTemporalProfileTraces( SmallVectorImpl &SrcTraces, uint64_t SrcStreamSize) { + for (auto &Trace : SrcTraces) + if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength) + Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength); + llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); }); // Assume that the source has the same reservoir size as the destination to // avoid needing to record it in the indexed profile format. bool IsDestSampled = diff --git a/llvm/test/tools/llvm-profdata/trace-limit.proftext b/llvm/test/tools/llvm-profdata/trace-limit.proftext index cf6edd648b23b2..e246ee890ba313 100644 --- a/llvm/test/tools/llvm-profdata/trace-limit.proftext +++ b/llvm/test/tools/llvm-profdata/trace-limit.proftext @@ -1,13 +1,17 @@ # RUN: llvm-profdata merge --temporal-profile-max-trace-length=0 %s -o %t.profdata # RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefix=NONE +# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %s %s %s %s -o %t.profdata +# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 --temporal-profile-max-trace-length=0 %t.profdata -o %t.profdata +# RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefix=NONE + # RUN: llvm-profdata merge --temporal-profile-max-trace-length=2 %s -o %t.profdata # RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,SOME # RUN: llvm-profdata merge --temporal-profile-max-trace-length=1000 %s -o %t.profdata # RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,ALL -# NONE: Temporal Profile Traces (samples=0 seen=0): +# NONE: Temporal Profile Traces (samples=0 # CHECK: Temporal Profile Traces (samples=1 seen=1): # SOME: Trace 0 (weight=1 count=2): # ALL: Trace 0 (weight=1 count=3): From c00e012bcf5da384a3e7339dc2e046779b339063 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 15 May 2024 17:03:09 -0700 Subject: [PATCH 15/16] [ctx_profile] Follow the pattern elsewhere for choosing the block IDs This was an oversight in #91859. Using the subblock ID mechanism other places that use the bitstream APIs (e.g. `BitstreamRemarkSerializer`) use. --- llvm/include/llvm/ProfileData/PGOCtxProfWriter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h index 15578c51a49578..edcf02c0946971 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h @@ -13,6 +13,7 @@ #ifndef LLVM_PROFILEDATA_PGOCTXPROFWRITER_H_ #define LLVM_PROFILEDATA_PGOCTXPROFWRITER_H_ +#include "llvm/Bitstream/BitCodeEnums.h" #include "llvm/Bitstream/BitstreamWriter.h" #include "llvm/ProfileData/CtxInstrContextNode.h" @@ -20,7 +21,7 @@ namespace llvm { enum PGOCtxProfileRecords { Invalid = 0, Version, Guid, CalleeIndex, Counters }; enum PGOCtxProfileBlockIDs { - ProfileMetadataBlockID = 100, + ProfileMetadataBlockID = bitc::FIRST_APPLICATION_BLOCKID, ContextNodeBlockID = ProfileMetadataBlockID + 1 }; From 772b1b0cb26c66804d0a7e416dc7a5742b7f8db2 Mon Sep 17 00:00:00 2001 From: ChiaHungDuan Date: Wed, 15 May 2024 17:13:08 -0700 Subject: [PATCH 16/16] [scudo] Move the chunk update into functions (#83493) The code paths for mte enabled and disabled were interleaving and which increases the difficulty of reading each path in both source level and assembly level. In this change, we move the parts that they have different logic into functions and minor refactors on the code structure. --- compiler-rt/lib/scudo/standalone/combined.h | 371 ++++++++++++-------- 1 file changed, 221 insertions(+), 150 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 927513dea92dab..15a199ae0349ba 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -410,133 +410,18 @@ class Allocator { reportOutOfMemory(NeededSize); } - const uptr BlockUptr = reinterpret_cast(Block); - const uptr UnalignedUserPtr = BlockUptr + Chunk::getHeaderSize(); - const uptr UserPtr = roundUp(UnalignedUserPtr, Alignment); - - void *Ptr = reinterpret_cast(UserPtr); - void *TaggedPtr = Ptr; - if (LIKELY(ClassId)) { - // We only need to zero or tag the contents for Primary backed - // allocations. We only set tags for primary allocations in order to avoid - // faulting potentially large numbers of pages for large secondary - // allocations. We assume that guard pages are enough to protect these - // allocations. - // - // FIXME: When the kernel provides a way to set the background tag of a - // mapping, we should be able to tag secondary allocations as well. - // - // When memory tagging is enabled, zeroing the contents is done as part of - // setting the tag. - if (UNLIKELY(useMemoryTagging(Options))) { - uptr PrevUserPtr; - Chunk::UnpackedHeader Header; - const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId); - const uptr BlockEnd = BlockUptr + BlockSize; - // If possible, try to reuse the UAF tag that was set by deallocate(). - // For simplicity, only reuse tags if we have the same start address as - // the previous allocation. This handles the majority of cases since - // most allocations will not be more aligned than the minimum alignment. - // - // We need to handle situations involving reclaimed chunks, and retag - // the reclaimed portions if necessary. In the case where the chunk is - // fully reclaimed, the chunk's header will be zero, which will trigger - // the code path for new mappings and invalid chunks that prepares the - // chunk from scratch. There are three possibilities for partial - // reclaiming: - // - // (1) Header was reclaimed, data was partially reclaimed. - // (2) Header was not reclaimed, all data was reclaimed (e.g. because - // data started on a page boundary). - // (3) Header was not reclaimed, data was partially reclaimed. - // - // Case (1) will be handled in the same way as for full reclaiming, - // since the header will be zero. - // - // We can detect case (2) by loading the tag from the start - // of the chunk. If it is zero, it means that either all data was - // reclaimed (since we never use zero as the chunk tag), or that the - // previous allocation was of size zero. Either way, we need to prepare - // a new chunk from scratch. - // - // We can detect case (3) by moving to the next page (if covered by the - // chunk) and loading the tag of its first granule. If it is zero, it - // means that all following pages may need to be retagged. On the other - // hand, if it is nonzero, we can assume that all following pages are - // still tagged, according to the logic that if any of the pages - // following the next page were reclaimed, the next page would have been - // reclaimed as well. - uptr TaggedUserPtr; - if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) && - PrevUserPtr == UserPtr && - (TaggedUserPtr = loadTag(UserPtr)) != UserPtr) { - uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes; - const uptr NextPage = roundUp(TaggedUserPtr, getPageSizeCached()); - if (NextPage < PrevEnd && loadTag(NextPage) != NextPage) - PrevEnd = NextPage; - TaggedPtr = reinterpret_cast(TaggedUserPtr); - resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, Size, BlockEnd); - if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) { - // If an allocation needs to be zeroed (i.e. calloc) we can normally - // avoid zeroing the memory now since we can rely on memory having - // been zeroed on free, as this is normally done while setting the - // UAF tag. But if tagging was disabled per-thread when the memory - // was freed, it would not have been retagged and thus zeroed, and - // therefore it needs to be zeroed now. - memset(TaggedPtr, 0, - Min(Size, roundUp(PrevEnd - TaggedUserPtr, - archMemoryTagGranuleSize()))); - } else if (Size) { - // Clear any stack metadata that may have previously been stored in - // the chunk data. - memset(TaggedPtr, 0, archMemoryTagGranuleSize()); - } - } else { - const uptr OddEvenMask = - computeOddEvenMaskForPointerMaybe(Options, BlockUptr, ClassId); - TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd); - } - storePrimaryAllocationStackMaybe(Options, Ptr); - } else { - Block = addHeaderTag(Block); - Ptr = addHeaderTag(Ptr); - if (UNLIKELY(FillContents != NoFill)) { - // This condition is not necessarily unlikely, but since memset is - // costly, we might as well mark it as such. - memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte, - PrimaryT::getSizeByClassId(ClassId)); - } - } - } else { - Block = addHeaderTag(Block); - Ptr = addHeaderTag(Ptr); - if (UNLIKELY(useMemoryTagging(Options))) { - storeTags(reinterpret_cast(Block), reinterpret_cast(Ptr)); - storeSecondaryAllocationStackMaybe(Options, Ptr, Size); - } + const uptr UserPtr = roundUp( + reinterpret_cast(Block) + Chunk::getHeaderSize(), Alignment); + const uptr SizeOrUnusedBytes = + ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size); + + if (LIKELY(!useMemoryTagging(Options))) { + return initChunk(ClassId, Origin, Block, UserPtr, SizeOrUnusedBytes, + FillContents); } - Chunk::UnpackedHeader Header = {}; - if (UNLIKELY(UnalignedUserPtr != UserPtr)) { - const uptr Offset = UserPtr - UnalignedUserPtr; - DCHECK_GE(Offset, 2 * sizeof(u32)); - // The BlockMarker has no security purpose, but is specifically meant for - // the chunk iteration function that can be used in debugging situations. - // It is the only situation where we have to locate the start of a chunk - // based on its block address. - reinterpret_cast(Block)[0] = BlockMarker; - reinterpret_cast(Block)[1] = static_cast(Offset); - Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask; - } - Header.ClassId = ClassId & Chunk::ClassIdMask; - Header.State = Chunk::State::Allocated; - Header.OriginOrWasZeroed = Origin & Chunk::OriginMask; - Header.SizeOrUnusedBytes = - (ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) & - Chunk::SizeOrUnusedBytesMask; - Chunk::storeHeader(Cookie, Ptr, &Header); - - return TaggedPtr; + return initChunkWithMemoryTagging(ClassId, Origin, Block, UserPtr, Size, + SizeOrUnusedBytes, FillContents); } NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0, @@ -1163,6 +1048,175 @@ class Allocator { reinterpret_cast(Ptr) - SizeOrUnusedBytes; } + ALWAYS_INLINE void *initChunk(const uptr ClassId, const Chunk::Origin Origin, + void *Block, const uptr UserPtr, + const uptr SizeOrUnusedBytes, + const FillContentsMode FillContents) { + Block = addHeaderTag(Block); + // Only do content fill when it's from primary allocator because secondary + // allocator has filled the content. + if (ClassId != 0 && UNLIKELY(FillContents != NoFill)) { + // This condition is not necessarily unlikely, but since memset is + // costly, we might as well mark it as such. + memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte, + PrimaryT::getSizeByClassId(ClassId)); + } + + Chunk::UnpackedHeader Header = {}; + + const uptr DefaultAlignedPtr = + reinterpret_cast(Block) + Chunk::getHeaderSize(); + if (UNLIKELY(DefaultAlignedPtr != UserPtr)) { + const uptr Offset = UserPtr - DefaultAlignedPtr; + DCHECK_GE(Offset, 2 * sizeof(u32)); + // The BlockMarker has no security purpose, but is specifically meant for + // the chunk iteration function that can be used in debugging situations. + // It is the only situation where we have to locate the start of a chunk + // based on its block address. + reinterpret_cast(Block)[0] = BlockMarker; + reinterpret_cast(Block)[1] = static_cast(Offset); + Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask; + } + + Header.ClassId = ClassId & Chunk::ClassIdMask; + Header.State = Chunk::State::Allocated; + Header.OriginOrWasZeroed = Origin & Chunk::OriginMask; + Header.SizeOrUnusedBytes = SizeOrUnusedBytes & Chunk::SizeOrUnusedBytesMask; + Chunk::storeHeader(Cookie, reinterpret_cast(addHeaderTag(UserPtr)), + &Header); + + return reinterpret_cast(UserPtr); + } + + NOINLINE void * + initChunkWithMemoryTagging(const uptr ClassId, const Chunk::Origin Origin, + void *Block, const uptr UserPtr, const uptr Size, + const uptr SizeOrUnusedBytes, + const FillContentsMode FillContents) { + const Options Options = Primary.Options.load(); + DCHECK(useMemoryTagging(Options)); + + void *Ptr = reinterpret_cast(UserPtr); + void *TaggedPtr = Ptr; + + if (LIKELY(ClassId)) { + // Init the primary chunk. + // + // We only need to zero or tag the contents for Primary backed + // allocations. We only set tags for primary allocations in order to avoid + // faulting potentially large numbers of pages for large secondary + // allocations. We assume that guard pages are enough to protect these + // allocations. + // + // FIXME: When the kernel provides a way to set the background tag of a + // mapping, we should be able to tag secondary allocations as well. + // + // When memory tagging is enabled, zeroing the contents is done as part of + // setting the tag. + + Chunk::UnpackedHeader Header; + const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId); + const uptr BlockUptr = reinterpret_cast(Block); + const uptr BlockEnd = BlockUptr + BlockSize; + // If possible, try to reuse the UAF tag that was set by deallocate(). + // For simplicity, only reuse tags if we have the same start address as + // the previous allocation. This handles the majority of cases since + // most allocations will not be more aligned than the minimum alignment. + // + // We need to handle situations involving reclaimed chunks, and retag + // the reclaimed portions if necessary. In the case where the chunk is + // fully reclaimed, the chunk's header will be zero, which will trigger + // the code path for new mappings and invalid chunks that prepares the + // chunk from scratch. There are three possibilities for partial + // reclaiming: + // + // (1) Header was reclaimed, data was partially reclaimed. + // (2) Header was not reclaimed, all data was reclaimed (e.g. because + // data started on a page boundary). + // (3) Header was not reclaimed, data was partially reclaimed. + // + // Case (1) will be handled in the same way as for full reclaiming, + // since the header will be zero. + // + // We can detect case (2) by loading the tag from the start + // of the chunk. If it is zero, it means that either all data was + // reclaimed (since we never use zero as the chunk tag), or that the + // previous allocation was of size zero. Either way, we need to prepare + // a new chunk from scratch. + // + // We can detect case (3) by moving to the next page (if covered by the + // chunk) and loading the tag of its first granule. If it is zero, it + // means that all following pages may need to be retagged. On the other + // hand, if it is nonzero, we can assume that all following pages are + // still tagged, according to the logic that if any of the pages + // following the next page were reclaimed, the next page would have been + // reclaimed as well. + uptr TaggedUserPtr; + uptr PrevUserPtr; + if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) && + PrevUserPtr == UserPtr && + (TaggedUserPtr = loadTag(UserPtr)) != UserPtr) { + uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes; + const uptr NextPage = roundUp(TaggedUserPtr, getPageSizeCached()); + if (NextPage < PrevEnd && loadTag(NextPage) != NextPage) + PrevEnd = NextPage; + TaggedPtr = reinterpret_cast(TaggedUserPtr); + resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, Size, BlockEnd); + if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) { + // If an allocation needs to be zeroed (i.e. calloc) we can normally + // avoid zeroing the memory now since we can rely on memory having + // been zeroed on free, as this is normally done while setting the + // UAF tag. But if tagging was disabled per-thread when the memory + // was freed, it would not have been retagged and thus zeroed, and + // therefore it needs to be zeroed now. + memset(TaggedPtr, 0, + Min(Size, roundUp(PrevEnd - TaggedUserPtr, + archMemoryTagGranuleSize()))); + } else if (Size) { + // Clear any stack metadata that may have previously been stored in + // the chunk data. + memset(TaggedPtr, 0, archMemoryTagGranuleSize()); + } + } else { + const uptr OddEvenMask = + computeOddEvenMaskForPointerMaybe(Options, BlockUptr, ClassId); + TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd); + } + storePrimaryAllocationStackMaybe(Options, Ptr); + } else { + // Init the secondary chunk. + + Block = addHeaderTag(Block); + Ptr = addHeaderTag(Ptr); + storeTags(reinterpret_cast(Block), reinterpret_cast(Ptr)); + storeSecondaryAllocationStackMaybe(Options, Ptr, Size); + } + + Chunk::UnpackedHeader Header = {}; + + const uptr DefaultAlignedPtr = + reinterpret_cast(Block) + Chunk::getHeaderSize(); + if (UNLIKELY(DefaultAlignedPtr != UserPtr)) { + const uptr Offset = UserPtr - DefaultAlignedPtr; + DCHECK_GE(Offset, 2 * sizeof(u32)); + // The BlockMarker has no security purpose, but is specifically meant for + // the chunk iteration function that can be used in debugging situations. + // It is the only situation where we have to locate the start of a chunk + // based on its block address. + reinterpret_cast(Block)[0] = BlockMarker; + reinterpret_cast(Block)[1] = static_cast(Offset); + Header.Offset = (Offset >> MinAlignmentLog) & Chunk::OffsetMask; + } + + Header.ClassId = ClassId & Chunk::ClassIdMask; + Header.State = Chunk::State::Allocated; + Header.OriginOrWasZeroed = Origin & Chunk::OriginMask; + Header.SizeOrUnusedBytes = SizeOrUnusedBytes & Chunk::SizeOrUnusedBytesMask; + Chunk::storeHeader(Cookie, Ptr, &Header); + + return TaggedPtr; + } + void quarantineOrDeallocateChunk(const Options &Options, void *TaggedPtr, Chunk::UnpackedHeader *Header, uptr Size) NO_THREAD_SAFETY_ANALYSIS { @@ -1177,31 +1231,23 @@ class Allocator { Header->State = Chunk::State::Available; else Header->State = Chunk::State::Quarantined; - Header->OriginOrWasZeroed = useMemoryTagging(Options) && - Header->ClassId && - !TSDRegistry.getDisableMemInit(); - Chunk::storeHeader(Cookie, Ptr, Header); - if (UNLIKELY(useMemoryTagging(Options))) { - u8 PrevTag = extractTag(reinterpret_cast(TaggedPtr)); - storeDeallocationStackMaybe(Options, Ptr, PrevTag, Size); - if (Header->ClassId) { - if (!TSDRegistry.getDisableMemInit()) { - uptr TaggedBegin, TaggedEnd; - const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( - Options, reinterpret_cast(getBlockBegin(Ptr, Header)), - Header->ClassId); - // Exclude the previous tag so that immediate use after free is - // detected 100% of the time. - setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, - &TaggedEnd); - } - } + void *BlockBegin; + if (LIKELY(!useMemoryTagging(Options))) { + Header->OriginOrWasZeroed = 0U; + if (BypassQuarantine && allocatorSupportsMemoryTagging()) + Ptr = untagPointer(Ptr); + BlockBegin = getBlockBegin(Ptr, Header); + } else { + Header->OriginOrWasZeroed = + Header->ClassId && !TSDRegistry.getDisableMemInit(); + BlockBegin = + retagBlock(Options, TaggedPtr, Ptr, Header, Size, BypassQuarantine); } + + Chunk::storeHeader(Cookie, Ptr, Header); + if (BypassQuarantine) { - if (allocatorSupportsMemoryTagging()) - Ptr = untagPointer(Ptr); - void *BlockBegin = getBlockBegin(Ptr, Header); const uptr ClassId = Header->ClassId; if (LIKELY(ClassId)) { bool CacheDrained; @@ -1216,9 +1262,6 @@ class Allocator { if (CacheDrained) Primary.tryReleaseToOS(ClassId, ReleaseToOS::Normal); } else { - if (UNLIKELY(useMemoryTagging(Options))) - storeTags(reinterpret_cast(BlockBegin), - reinterpret_cast(Ptr)); Secondary.deallocate(Options, BlockBegin); } } else { @@ -1228,6 +1271,34 @@ class Allocator { } } + NOINLINE void *retagBlock(const Options &Options, void *TaggedPtr, void *&Ptr, + Chunk::UnpackedHeader *Header, const uptr Size, + bool BypassQuarantine) { + DCHECK(useMemoryTagging(Options)); + + const u8 PrevTag = extractTag(reinterpret_cast(TaggedPtr)); + storeDeallocationStackMaybe(Options, Ptr, PrevTag, Size); + if (Header->ClassId && !TSDRegistry.getDisableMemInit()) { + uptr TaggedBegin, TaggedEnd; + const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( + Options, reinterpret_cast(getBlockBegin(Ptr, Header)), + Header->ClassId); + // Exclude the previous tag so that immediate use after free is + // detected 100% of the time. + setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, + &TaggedEnd); + } + + Ptr = untagPointer(Ptr); + void *BlockBegin = getBlockBegin(Ptr, Header); + if (BypassQuarantine && !Header->ClassId) { + storeTags(reinterpret_cast(BlockBegin), + reinterpret_cast(Ptr)); + } + + return BlockBegin; + } + bool getChunkFromBlock(uptr Block, uptr *Chunk, Chunk::UnpackedHeader *Header) { *Chunk =