diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index b3e2c8243a01c9..f79d1a9d24583d 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -775,6 +775,29 @@ backend:AArch64: - clang/include/clang/Sema/SemaARM.h - clang/lib/Sema/SemaARM.cpp +backend:Hexagon: + - clang/include/clang/Basic/BuiltinsHexagon*.def + - clang/include/clang/Sema/SemaHexagon.h + - clang/lib/Basic/Targets/Hexagon.* + - clang/lib/CodeGen/Targets/Hexagon.cpp + - clang/lib/Driver/ToolChains/Hexagon.* + - clang/lib/Sema/SemaHexagon.cpp + - lld/ELF/Arch/Hexagon.cpp + - lldb/source/Plugins/ABI/Hexagon/** + - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** + - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def + - llvm/include/llvm/IR/IntrinsicsHexagon* + - llvm/include/llvm/Support/Hexagon* + - llvm/lib/Support/Hexagon* + - llvm/lib/Target/Hexagon/** + - llvm/test/CodeGen/Hexagon/** + - llvm/test/CodeGen/*/Hexagon/** + - llvm/test/DebugInfo/*/Hexagon/** + - llvm/test/Transforms/*/Hexagon + - llvm/test/MC/Disassembler/Hexagon/** + - llvm/test/MC/Hexagon/** + - llvm/test/tools/llvm-objdump/ELF/Hexagon/** + backend:loongarch: - llvm/include/llvm/IR/IntrinsicsLoongArch.td - llvm/test/MC/LoongArch/** diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 89c05e3ab005b2..282638f9dc76f9 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2538,6 +2538,7 @@ struct CFISnapshot { case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpLLVMDefAspaceCfa: + case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); break; case MCCFIInstruction::OpRememberState: @@ -2675,6 +2676,7 @@ struct CFISnapshotDiff : public CFISnapshot { case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpLLVMDefAspaceCfa: + case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); return false; case MCCFIInstruction::OpRememberState: @@ -2823,6 +2825,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState, case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpLLVMDefAspaceCfa: + case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); break; case MCCFIInstruction::OpGnuArgsSize: diff --git a/clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp b/clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp index 4c5808daa6ae74..79d0e380e402d7 100644 --- a/clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp @@ -9,6 +9,7 @@ #include "../ClangTidy.h" #include "../ClangTidyModule.h" #include "../ClangTidyModuleRegistry.h" +#include "UseRangesCheck.h" #include "UseToStringCheck.h" using namespace clang::ast_matchers; @@ -18,6 +19,7 @@ namespace boost { class BoostModule : public ClangTidyModule { public: void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck("boost-use-ranges"); CheckFactories.registerCheck("boost-use-to-string"); } }; diff --git a/clang-tools-extra/clang-tidy/boost/CMakeLists.txt b/clang-tools-extra/clang-tidy/boost/CMakeLists.txt index 167b6fab774b7c..fed3c3ba01c169 100644 --- a/clang-tools-extra/clang-tidy/boost/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/boost/CMakeLists.txt @@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS add_clang_library(clangTidyBoostModule BoostTidyModule.cpp + UseRangesCheck.cpp UseToStringCheck.cpp LINK_LIBS diff --git a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp new file mode 100644 index 00000000000000..9351a1c90ae546 --- /dev/null +++ b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp @@ -0,0 +1,371 @@ +//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UseRangesCheck.h" +#include "clang/AST/Decl.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include +#include +#include + +// FixItHint - Let the docs script know that this class does provide fixits + +namespace clang::tidy::boost { + +namespace { +/// Base replacer that handles the boost include path and namespace +class BoostReplacer : public UseRangesCheck::Replacer { +public: + BoostReplacer(ArrayRef Signatures, + bool IncludeSystem) + : Signatures(Signatures), IncludeSystem(IncludeSystem) {} + + ArrayRef getReplacementSignatures() const final { + return Signatures; + } + + virtual std::pair + getBoostName(const NamedDecl &OriginalName) const = 0; + + virtual std::pair + getBoostHeader(const NamedDecl &OriginalName) const = 0; + + std::optional + getReplaceName(const NamedDecl &OriginalName) const final { + auto [Namespace, Function] = getBoostName(OriginalName); + return ("boost::" + Namespace + (Namespace.empty() ? "" : "::") + Function) + .str(); + } + + std::optional + getHeaderInclusion(const NamedDecl &OriginalName) const final { + auto [Path, HeaderName] = getBoostHeader(OriginalName); + return ((IncludeSystem ? "" : ".hpp")) + .str(); + } + +private: + SmallVector Signatures; + bool IncludeSystem; +}; + +/// Creates replaces where the header file lives in +/// `boost/algorithm/.hpp` and the function is named +/// `boost::range::` +class BoostRangeAlgorithmReplacer : public BoostReplacer { +public: + using BoostReplacer::BoostReplacer; + + std::pair + getBoostName(const NamedDecl &OriginalName) const override { + return {"range", OriginalName.getName()}; + } + + std::pair + getBoostHeader(const NamedDecl &OriginalName) const override { + return {"range/algorithm", OriginalName.getName()}; + } +}; + +/// Creates replaces where the header file lives in +/// `boost/algorithm/.hpp` and the function is named +/// `boost::range::` +class CustomBoostAlgorithmHeaderReplacer : public BoostRangeAlgorithmReplacer { +public: + CustomBoostAlgorithmHeaderReplacer( + StringRef HeaderName, ArrayRef Signatures, + bool IncludeSystem) + : BoostRangeAlgorithmReplacer(Signatures, IncludeSystem), + HeaderName(HeaderName) {} + + std::pair + getBoostHeader(const NamedDecl & /*OriginalName*/) const override { + return {"range/algorithm", HeaderName}; + } + +private: + StringRef HeaderName; +}; + +/// Creates replaces where the header file lives in +/// `boost/algorithm/.hpp` and the function is named +/// `boost::algorithm::` +class BoostAlgorithmReplacer : public BoostReplacer { +public: + BoostAlgorithmReplacer(StringRef SubHeader, + ArrayRef Signatures, + bool IncludeSystem) + : BoostReplacer(Signatures, IncludeSystem), + SubHeader(("algorithm/" + SubHeader).str()) {} + std::pair + getBoostName(const NamedDecl &OriginalName) const override { + return {"algorithm", OriginalName.getName()}; + } + + std::pair + getBoostHeader(const NamedDecl &OriginalName) const override { + return {SubHeader, OriginalName.getName()}; + } + +private: + std::string SubHeader; +}; + +/// Creates replaces where the header file lives in +/// `boost/algorithm//.hpp` and the function is named +/// `boost::algorithm::` +class CustomBoostAlgorithmReplacer : public BoostReplacer { +public: + CustomBoostAlgorithmReplacer(StringRef SubHeader, StringRef HeaderName, + ArrayRef Signatures, + bool IncludeSystem) + : BoostReplacer(Signatures, IncludeSystem), + SubHeader(("algorithm/" + SubHeader).str()), HeaderName(HeaderName) {} + std::pair + getBoostName(const NamedDecl &OriginalName) const override { + return {"algorithm", OriginalName.getName()}; + } + + std::pair + getBoostHeader(const NamedDecl & /*OriginalName*/) const override { + return {SubHeader, HeaderName}; + } + +private: + std::string SubHeader; + StringRef HeaderName; +}; + +/// A Replacer that is used for functions that just call a new overload +class MakeOverloadReplacer : public UseRangesCheck::Replacer { +public: + explicit MakeOverloadReplacer(ArrayRef Signatures) + : Signatures(Signatures) {} + + ArrayRef + getReplacementSignatures() const override { + return Signatures; + } + + std::optional + getReplaceName(const NamedDecl & /* OriginalName */) const override { + return std::nullopt; + } + + std::optional + getHeaderInclusion(const NamedDecl & /* OriginalName */) const override { + return std::nullopt; + } + +private: + SmallVector Signatures; +}; + +/// A replacer that replaces functions with an equivalent named function in the +/// root boost namespace +class FixedBoostReplace : public BoostReplacer { +public: + FixedBoostReplace(StringRef Header, + ArrayRef Signatures, + bool IncludeBoostSystem) + : BoostReplacer(Signatures, IncludeBoostSystem), Header(Header) {} + + std::pair + getBoostName(const NamedDecl &OriginalName) const override { + return {{}, OriginalName.getName()}; + } + + std::pair + getBoostHeader(const NamedDecl & /* OriginalName */) const override { + return {{}, Header}; + } + +private: + StringRef Header; +}; + +} // namespace + +utils::UseRangesCheck::ReplacerMap UseRangesCheck::getReplacerMap() const { + + ReplacerMap Results; + static const Signature SingleSig = {{0}}; + static const Signature TwoSig = {{0}, {2}}; + static const auto AddFrom = + [&Results](llvm::IntrusiveRefCntPtr Replacer, + std::initializer_list Names, StringRef Prefix) { + llvm::SmallString<64> Buffer; + for (const auto &Name : Names) { + Buffer.assign({"::", Prefix, (Prefix.empty() ? "" : "::"), Name}); + Results.try_emplace(Buffer, Replacer); + } + }; + + static const auto AddFromStd = + [](llvm::IntrusiveRefCntPtr Replacer, + std::initializer_list Names) { + AddFrom(Replacer, Names, "std"); + }; + + static const auto AddFromBoost = + [](llvm::IntrusiveRefCntPtr Replacer, + std::initializer_list< + std::pair>> + NamespaceAndNames) { + for (auto [Namespace, Names] : NamespaceAndNames) + AddFrom(Replacer, Names, + SmallString<64>{"boost", (Namespace.empty() ? "" : "::"), + Namespace}); + }; + + AddFromStd(llvm::makeIntrusiveRefCnt( + "set_algorithm", TwoSig, IncludeBoostSystem), + {"includes", "set_union", "set_intersection", "set_difference", + "set_symmetric_difference"}); + + AddFromStd(llvm::makeIntrusiveRefCnt( + SingleSig, IncludeBoostSystem), + {"unique", "lower_bound", "stable_sort", + "equal_range", "remove_if", "sort", + "random_shuffle", "remove_copy", "stable_partition", + "remove_copy_if", "count", "copy_backward", + "reverse_copy", "adjacent_find", "remove", + "upper_bound", "binary_search", "replace_copy_if", + "for_each", "generate", "count_if", + "min_element", "reverse", "replace_copy", + "fill", "unique_copy", "transform", + "copy", "replace", "find", + "replace_if", "find_if", "partition", + "max_element"}); + + AddFromStd(llvm::makeIntrusiveRefCnt( + TwoSig, IncludeBoostSystem), + {"find_end", "merge", "partial_sort_copy", "find_first_of", + "search", "lexicographical_compare", "equal", "mismatch"}); + + AddFromStd(llvm::makeIntrusiveRefCnt( + "permutation", SingleSig, IncludeBoostSystem), + {"next_permutation", "prev_permutation"}); + + AddFromStd(llvm::makeIntrusiveRefCnt( + "heap_algorithm", SingleSig, IncludeBoostSystem), + {"push_heap", "pop_heap", "make_heap", "sort_heap"}); + + AddFromStd(llvm::makeIntrusiveRefCnt( + "cxx11", SingleSig, IncludeBoostSystem), + {"copy_if", "is_permutation", "is_partitioned", "find_if_not", + "partition_copy", "any_of", "iota", "all_of", "partition_point", + "is_sorted", "none_of"}); + + AddFromStd(llvm::makeIntrusiveRefCnt( + "cxx11", "is_sorted", SingleSig, IncludeBoostSystem), + {"is_sorted_until"}); + + AddFromStd(llvm::makeIntrusiveRefCnt( + "range/numeric", SingleSig, IncludeBoostSystem), + {"accumulate", "partial_sum", "adjacent_difference"}); + + if (getLangOpts().CPlusPlus17) + AddFromStd(llvm::makeIntrusiveRefCnt( + "cxx17", SingleSig, IncludeBoostSystem), + {"reduce"}); + + AddFromBoost(llvm::makeIntrusiveRefCnt(SingleSig), + {{"algorithm", + {"reduce", + "find_backward", + "find_not_backward", + "find_if_backward", + "find_if_not_backward", + "hex", + "hex_lower", + "unhex", + "is_partitioned_until", + "is_palindrome", + "copy_if", + "copy_while", + "copy_until", + "copy_if_while", + "copy_if_until", + "is_permutation", + "is_partitioned", + "one_of", + "one_of_equal", + "find_if_not", + "partition_copy", + "any_of", + "any_of_equal", + "iota", + "all_of", + "all_of_equal", + "partition_point", + "is_sorted_until", + "is_sorted", + "is_increasing", + "is_decreasing", + "is_strictly_increasing", + "is_strictly_decreasing", + "none_of", + "none_of_equal", + "clamp_range"}}}); + + AddFromBoost( + llvm::makeIntrusiveRefCnt(TwoSig), + {{"algorithm", {"apply_permutation", "apply_reverse_permutation"}}}); + + return Results; +} + +UseRangesCheck::UseRangesCheck(StringRef Name, ClangTidyContext *Context) + : utils::UseRangesCheck(Name, Context), + IncludeBoostSystem(Options.get("IncludeBoostSystem", true)) {} + +void UseRangesCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + utils::UseRangesCheck::storeOptions(Opts); + Options.store(Opts, "IncludeBoostSystem", IncludeBoostSystem); +} +DiagnosticBuilder UseRangesCheck::createDiag(const CallExpr &Call) { + DiagnosticBuilder D = + diag(Call.getBeginLoc(), "use a %0 version of this algorithm"); + D << (Call.getDirectCallee()->isInStdNamespace() ? "boost" : "ranged"); + return D; +} +ArrayRef> +UseRangesCheck::getFreeBeginEndMethods() const { + static const std::pair Refs[] = { + {"::std::begin", "::std::end"}, + {"::std::cbegin", "::std::cend"}, + {"::boost::range_adl_barrier::begin", "::boost::range_adl_barrier::end"}, + {"::boost::range_adl_barrier::const_begin", + "::boost::range_adl_barrier::const_end"}, + }; + return Refs; +} +std::optional +UseRangesCheck::getReverseDescriptor() const { + static const std::pair Refs[] = { + {"::std::rbegin", "::std::rend"}, + {"::std::crbegin", "::std::crend"}, + {"::boost::rbegin", "::boost::rend"}, + {"::boost::const_rbegin", "::boost::const_rend"}, + }; + return ReverseIteratorDescriptor{"boost::adaptors::reverse", + IncludeBoostSystem + ? "" + : "boost/range/adaptor/reversed.hpp", + Refs}; +} +} // namespace clang::tidy::boost diff --git a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h new file mode 100644 index 00000000000000..a59ced12a6c438 --- /dev/null +++ b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h @@ -0,0 +1,43 @@ +//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USERANGESCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USERANGESCHECK_H + +#include "../utils/UseRangesCheck.h" + +namespace clang::tidy::boost { + +/// Detects calls to standard library iterator algorithms that could be +/// replaced with a boost ranges version instead +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/boost/use-ranges.html +class UseRangesCheck : public utils::UseRangesCheck { +public: + UseRangesCheck(StringRef Name, ClangTidyContext *Context); + + void storeOptions(ClangTidyOptions::OptionMap &Options) override; + + ReplacerMap getReplacerMap() const override; + + DiagnosticBuilder createDiag(const CallExpr &Call) override; + + ArrayRef> + getFreeBeginEndMethods() const override; + + std::optional + getReverseDescriptor() const override; + +private: + bool IncludeBoostSystem; +}; + +} // namespace clang::tidy::boost + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USERANGESCHECK_H diff --git a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp index b91ad0f1822955..c90c92b5f660a6 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp @@ -11,9 +11,11 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/CFG.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "../utils/ExprSequence.h" #include "../utils/Matchers.h" @@ -34,7 +36,12 @@ struct UseAfterMove { const DeclRefExpr *DeclRef; // Is the order in which the move and the use are evaluated undefined? - bool EvaluationOrderUndefined; + bool EvaluationOrderUndefined = false; + + // Does the use happen in a later loop iteration than the move? + // + // We default to false and change it to true if required in find(). + bool UseHappensInLaterLoopIteration = false; }; /// Finds uses of a variable after a move (and maintains state required by the @@ -48,7 +55,7 @@ class UseAfterMoveFinder { // use-after-move is found, writes information about it to 'TheUseAfterMove'. // Returns whether a use-after-move was found. bool find(Stmt *CodeBlock, const Expr *MovingCall, - const ValueDecl *MovedVariable, UseAfterMove *TheUseAfterMove); + const DeclRefExpr *MovedVariable, UseAfterMove *TheUseAfterMove); private: bool findInternal(const CFGBlock *Block, const Expr *MovingCall, @@ -89,7 +96,7 @@ UseAfterMoveFinder::UseAfterMoveFinder(ASTContext *TheContext) : Context(TheContext) {} bool UseAfterMoveFinder::find(Stmt *CodeBlock, const Expr *MovingCall, - const ValueDecl *MovedVariable, + const DeclRefExpr *MovedVariable, UseAfterMove *TheUseAfterMove) { // Generate the CFG manually instead of through an AnalysisDeclContext because // it seems the latter can't be used to generate a CFG for the body of a @@ -110,15 +117,32 @@ bool UseAfterMoveFinder::find(Stmt *CodeBlock, const Expr *MovingCall, BlockMap = std::make_unique(TheCFG.get(), Context); Visited.clear(); - const CFGBlock *Block = BlockMap->blockContainingStmt(MovingCall); - if (!Block) { + const CFGBlock *MoveBlock = BlockMap->blockContainingStmt(MovingCall); + if (!MoveBlock) { // This can happen if MovingCall is in a constructor initializer, which is // not included in the CFG because the CFG is built only from the function // body. - Block = &TheCFG->getEntry(); + MoveBlock = &TheCFG->getEntry(); } - return findInternal(Block, MovingCall, MovedVariable, TheUseAfterMove); + bool Found = findInternal(MoveBlock, MovingCall, MovedVariable->getDecl(), + TheUseAfterMove); + + if (Found) { + if (const CFGBlock *UseBlock = + BlockMap->blockContainingStmt(TheUseAfterMove->DeclRef)) { + // Does the use happen in a later loop iteration than the move? + // - If they are in the same CFG block, we know the use happened in a + // later iteration if we visited that block a second time. + // - Otherwise, we know the use happened in a later iteration if the + // move is reachable from the use. + CFGReverseBlockReachabilityAnalysis CFA(*TheCFG); + TheUseAfterMove->UseHappensInLaterLoopIteration = + UseBlock == MoveBlock ? Visited.contains(UseBlock) + : CFA.isReachable(UseBlock, MoveBlock); + } + } + return Found; } bool UseAfterMoveFinder::findInternal(const CFGBlock *Block, @@ -394,7 +418,7 @@ static void emitDiagnostic(const Expr *MovingCall, const DeclRefExpr *MoveArg, "there is no guarantee about the order in which they are evaluated", DiagnosticIDs::Note) << IsMove; - } else if (UseLoc < MoveLoc || Use.DeclRef == MoveArg) { + } else if (Use.UseHappensInLaterLoopIteration) { Check->diag(UseLoc, "the use happens in a later loop iteration than the " "%select{forward|move}0", @@ -495,7 +519,7 @@ void UseAfterMoveCheck::check(const MatchFinder::MatchResult &Result) { for (Stmt *CodeBlock : CodeBlocks) { UseAfterMoveFinder Finder(Result.Context); UseAfterMove Use; - if (Finder.find(CodeBlock, MovingCall, Arg->getDecl(), &Use)) + if (Finder.find(CodeBlock, MovingCall, Arg, &Use)) emitDiagnostic(MovingCall, Arg, Use, this, Result.Context, determineMoveType(MoveDecl)); } diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt index 576805c4c7f181..4f68c487cac9d4 100644 --- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt @@ -40,6 +40,7 @@ add_clang_library(clangTidyModernizeModule UseNoexceptCheck.cpp UseNullptrCheck.cpp UseOverrideCheck.cpp + UseRangesCheck.cpp UseStartsEndsWithCheck.cpp UseStdFormatCheck.cpp UseStdNumbersCheck.cpp diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp index b9c7a2dc383e88..18607593320635 100644 --- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp @@ -41,6 +41,7 @@ #include "UseNoexceptCheck.h" #include "UseNullptrCheck.h" #include "UseOverrideCheck.h" +#include "UseRangesCheck.h" #include "UseStartsEndsWithCheck.h" #include "UseStdFormatCheck.h" #include "UseStdNumbersCheck.h" @@ -75,6 +76,7 @@ class ModernizeModule : public ClangTidyModule { CheckFactories.registerCheck("modernize-pass-by-value"); CheckFactories.registerCheck( "modernize-use-designated-initializers"); + CheckFactories.registerCheck("modernize-use-ranges"); CheckFactories.registerCheck( "modernize-use-starts-ends-with"); CheckFactories.registerCheck("modernize-use-std-format"); diff --git a/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.cpp new file mode 100644 index 00000000000000..5c7b315f43173b --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.cpp @@ -0,0 +1,185 @@ +//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UseRangesCheck.h" +#include "clang/AST/Decl.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include + +// FixItHint - Let the docs script know that this class does provide fixits + +namespace clang::tidy::modernize { + +static constexpr const char *SingleRangeNames[] = { + "all_of", + "any_of", + "none_of", + "for_each", + "find", + "find_if", + "find_if_not", + "adjacent_find", + "copy", + "copy_if", + "copy_backward", + "move", + "move_backward", + "fill", + "transform", + "replace", + "replace_if", + "generate", + "remove", + "remove_if", + "remove_copy", + "remove_copy_if", + "unique", + "unique_copy", + "sample", + "partition_point", + "lower_bound", + "upper_bound", + "equal_range", + "binary_search", + "push_heap", + "pop_heap", + "make_heap", + "sort_heap", + "next_permutation", + "prev_permutation", + "reverse", + "reverse_copy", + "shift_left", + "shift_right", + "is_partitioned", + "partition", + "partition_copy", + "stable_partition", + "sort", + "stable_sort", + "is_sorted", + "is_sorted_until", + "is_heap", + "is_heap_until", + "max_element", + "min_element", + "minmax_element", + "uninitialized_copy", + "uninitialized_fill", + "uninitialized_move", + "uninitialized_default_construct", + "uninitialized_value_construct", + "destroy", +}; + +static constexpr const char *TwoRangeNames[] = { + "equal", + "mismatch", + "partial_sort_copy", + "includes", + "set_union", + "set_intersection", + "set_difference", + "set_symmetric_difference", + "merge", + "lexicographical_compare", + "find_end", + "search", + "is_permutation", +}; + +namespace { +class StdReplacer : public utils::UseRangesCheck::Replacer { +public: + explicit StdReplacer(SmallVector Signatures) + : Signatures(std::move(Signatures)) {} + std::optional + getReplaceName(const NamedDecl &OriginalName) const override { + return ("std::ranges::" + OriginalName.getName()).str(); + } + ArrayRef + getReplacementSignatures() const override { + return Signatures; + } + +private: + SmallVector Signatures; +}; + +class StdAlgorithmReplacer : public StdReplacer { + using StdReplacer::StdReplacer; + std::optional + getHeaderInclusion(const NamedDecl & /*OriginalName*/) const override { + return ""; + } +}; + +class StdNumericReplacer : public StdReplacer { + using StdReplacer::StdReplacer; + std::optional + getHeaderInclusion(const NamedDecl & /*OriginalName*/) const override { + return ""; + } +}; +} // namespace + +utils::UseRangesCheck::ReplacerMap UseRangesCheck::getReplacerMap() const { + + utils::UseRangesCheck::ReplacerMap Result; + + // template Func(Iter first, Iter last,...). + static const Signature SingleRangeArgs = {{0}}; + // template + // Func(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2,...). + static const Signature TwoRangeArgs = {{0}, {2}}; + + static const Signature SingleRangeFunc[] = {SingleRangeArgs}; + + static const Signature TwoRangeFunc[] = {TwoRangeArgs}; + + static const std::pair, ArrayRef> + AlgorithmNames[] = {{SingleRangeFunc, SingleRangeNames}, + {TwoRangeFunc, TwoRangeNames}}; + SmallString<64> Buff; + for (const auto &[Signatures, Values] : AlgorithmNames) { + auto Replacer = llvm::makeIntrusiveRefCnt( + SmallVector{Signatures}); + for (const auto &Name : Values) { + Buff.assign({"::std::", Name}); + Result.try_emplace(Buff, Replacer); + } + } + if (getLangOpts().CPlusPlus23) + Result.try_emplace( + "::std::iota", + llvm::makeIntrusiveRefCnt( + SmallVector{std::begin(SingleRangeFunc), + std::end(SingleRangeFunc)})); + return Result; +} + +bool UseRangesCheck::isLanguageVersionSupported( + const LangOptions &LangOpts) const { + return LangOpts.CPlusPlus20; +} +ArrayRef> +UseRangesCheck::getFreeBeginEndMethods() const { + static const std::pair Refs[] = { + {"::std::begin", "::std::end"}, {"::std::cbegin", "::std::cend"}}; + return Refs; +} +std::optional +UseRangesCheck::getReverseDescriptor() const { + static const std::pair Refs[] = { + {"::std::rbegin", "::std::rend"}, {"::std::crbegin", "::std::crend"}}; + return ReverseIteratorDescriptor{"std::views::reverse", "", Refs}; +} +} // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h new file mode 100644 index 00000000000000..2f7613dd1cd246 --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h @@ -0,0 +1,38 @@ +//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USERANGESCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USERANGESCHECK_H + +#include "../utils/UseRangesCheck.h" + +namespace clang::tidy::modernize { + +/// Detects calls to standard library iterator algorithms that could be +/// replaced with a ranges version instead +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/modernize/use-ranges.html +class UseRangesCheck : public utils::UseRangesCheck { +public: + using utils::UseRangesCheck::UseRangesCheck; + + ReplacerMap getReplacerMap() const override; + + ArrayRef> + getFreeBeginEndMethods() const override; + + std::optional + getReverseDescriptor() const override; + + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override; +}; + +} // namespace clang::tidy::modernize + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USERANGESCHECK_H diff --git a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt index 9cff7d475425d7..504c6e928bdad0 100644 --- a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt @@ -25,6 +25,7 @@ add_clang_library(clangTidyUtils RenamerClangTidyCheck.cpp TransformerClangTidyCheck.cpp TypeTraits.cpp + UseRangesCheck.cpp UsingInserter.cpp LINK_LIBS diff --git a/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp b/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp index 50df451ecfa268..145a5fe378b3e2 100644 --- a/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp +++ b/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp @@ -55,12 +55,18 @@ bool isDescendantOrEqual(const Stmt *Descendant, const Stmt *Ancestor, ASTContext *Context) { if (Descendant == Ancestor) return true; - for (const Stmt *Parent : getParentStmts(Descendant, Context)) { - if (isDescendantOrEqual(Parent, Ancestor, Context)) - return true; - } + return llvm::any_of(getParentStmts(Descendant, Context), + [Ancestor, Context](const Stmt *Parent) { + return isDescendantOrEqual(Parent, Ancestor, Context); + }); +} - return false; +bool isDescendantOfArgs(const Stmt *Descendant, const CallExpr *Call, + ASTContext *Context) { + return llvm::any_of(Call->arguments(), + [Descendant, Context](const Expr *Arg) { + return isDescendantOrEqual(Descendant, Arg, Context); + }); } llvm::SmallVector @@ -95,9 +101,59 @@ bool ExprSequence::inSequence(const Stmt *Before, const Stmt *After) const { return true; } + SmallVector BeforeParents = getParentStmts(Before, Context); + + // Since C++17, the callee of a call expression is guaranteed to be sequenced + // before all of the arguments. + // We handle this as a special case rather than using the general + // `getSequenceSuccessor` logic above because the callee expression doesn't + // have an unambiguous successor; the order in which arguments are evaluated + // is indeterminate. + for (const Stmt *Parent : BeforeParents) { + // Special case: If the callee is a `MemberExpr` with a `DeclRefExpr` as its + // base, we consider it to be sequenced _after_ the arguments. This is + // because the variable referenced in the base will only actually be + // accessed when the call happens, i.e. once all of the arguments have been + // evaluated. This has no basis in the C++ standard, but it reflects actual + // behavior that is relevant to a use-after-move scenario: + // + // ``` + // a.bar(consumeA(std::move(a)); + // ``` + // + // In this example, we end up accessing `a` after it has been moved from, + // even though nominally the callee `a.bar` is evaluated before the argument + // `consumeA(std::move(a))`. Note that this is not specific to C++17, so + // we implement this logic unconditionally. + if (const auto *Call = dyn_cast(Parent)) { + if (is_contained(Call->arguments(), Before) && + isa( + Call->getImplicitObjectArgument()->IgnoreParenImpCasts()) && + isDescendantOrEqual(After, Call->getImplicitObjectArgument(), + Context)) + return true; + + // We need this additional early exit so that we don't fall through to the + // more general logic below. + if (const auto *Member = dyn_cast(Before); + Member && Call->getCallee() == Member && + isa(Member->getBase()->IgnoreParenImpCasts()) && + isDescendantOfArgs(After, Call, Context)) + return false; + } + + if (!Context->getLangOpts().CPlusPlus17) + continue; + + if (const auto *Call = dyn_cast(Parent); + Call && Call->getCallee() == Before && + isDescendantOfArgs(After, Call, Context)) + return true; + } + // If 'After' is a parent of 'Before' or is sequenced after one of these // parents, we know that it is sequenced after 'Before'. - for (const Stmt *Parent : getParentStmts(Before, Context)) { + for (const Stmt *Parent : BeforeParents) { if (Parent == After || inSequence(Parent, After)) return true; } diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp new file mode 100644 index 00000000000000..9c59e4651953ac --- /dev/null +++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp @@ -0,0 +1,306 @@ +//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UseRangesCheck.h" +#include "Matchers.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +using namespace clang::ast_matchers; + +static constexpr const char BoundCall[] = "CallExpr"; +static constexpr const char FuncDecl[] = "FuncDecl"; +static constexpr const char ArgName[] = "ArgName"; + +namespace clang::tidy::utils { + +static bool operator==(const UseRangesCheck::Indexes &L, + const UseRangesCheck::Indexes &R) { + return std::tie(L.BeginArg, L.EndArg, L.ReplaceArg) == + std::tie(R.BeginArg, R.EndArg, R.ReplaceArg); +} + +static std::string getFullPrefix(ArrayRef Signature) { + std::string Output; + llvm::raw_string_ostream OS(Output); + for (const UseRangesCheck::Indexes &Item : Signature) + OS << Item.BeginArg << ":" << Item.EndArg << ":" + << (Item.ReplaceArg == Item.First ? '0' : '1'); + return Output; +} + +static llvm::hash_code hash_value(const UseRangesCheck::Indexes &Indexes) { + return llvm::hash_combine(Indexes.BeginArg, Indexes.EndArg, + Indexes.ReplaceArg); +} + +static llvm::hash_code hash_value(const UseRangesCheck::Signature &Sig) { + return llvm::hash_combine_range(Sig.begin(), Sig.end()); +} + +namespace { + +AST_MATCHER(Expr, hasSideEffects) { + return Node.HasSideEffects(Finder->getASTContext()); +} +} // namespace + +static auto +makeExprMatcher(ast_matchers::internal::Matcher ArgumentMatcher, + ArrayRef MethodNames, + ArrayRef FreeNames) { + return expr( + anyOf(cxxMemberCallExpr(argumentCountIs(0), + callee(cxxMethodDecl(hasAnyName(MethodNames))), + on(ArgumentMatcher)), + callExpr(argumentCountIs(1), hasArgument(0, ArgumentMatcher), + hasDeclaration(functionDecl(hasAnyName(FreeNames)))))); +} + +static ast_matchers::internal::Matcher +makeMatcherPair(StringRef State, const UseRangesCheck::Indexes &Indexes, + ArrayRef BeginFreeNames, + ArrayRef EndFreeNames, + const std::optional + &ReverseDescriptor) { + std::string ArgBound = (ArgName + llvm::Twine(Indexes.BeginArg)).str(); + SmallString<64> ID = {BoundCall, State}; + ast_matchers::internal::Matcher ArgumentMatcher = allOf( + hasArgument(Indexes.BeginArg, + makeExprMatcher(expr(unless(hasSideEffects())).bind(ArgBound), + {"begin", "cbegin"}, BeginFreeNames)), + hasArgument(Indexes.EndArg, + makeExprMatcher( + expr(matchers::isStatementIdenticalToBoundNode(ArgBound)), + {"end", "cend"}, EndFreeNames))); + if (ReverseDescriptor) { + ArgBound.push_back('R'); + SmallVector RBegin{ + llvm::make_first_range(ReverseDescriptor->FreeReverseNames)}; + SmallVector REnd{ + llvm::make_second_range(ReverseDescriptor->FreeReverseNames)}; + ArgumentMatcher = anyOf( + ArgumentMatcher, + allOf(hasArgument( + Indexes.BeginArg, + makeExprMatcher(expr(unless(hasSideEffects())).bind(ArgBound), + {"rbegin", "crbegin"}, RBegin)), + hasArgument( + Indexes.EndArg, + makeExprMatcher( + expr(matchers::isStatementIdenticalToBoundNode(ArgBound)), + {"rend", "crend"}, REnd)))); + } + return callExpr(argumentCountAtLeast( + std::max(Indexes.BeginArg, Indexes.EndArg) + 1), + ArgumentMatcher) + .bind(ID); +} + +void UseRangesCheck::registerMatchers(MatchFinder *Finder) { + Replaces = getReplacerMap(); + ReverseDescriptor = getReverseDescriptor(); + auto BeginEndNames = getFreeBeginEndMethods(); + llvm::SmallVector BeginNames{ + llvm::make_first_range(BeginEndNames)}; + llvm::SmallVector EndNames{ + llvm::make_second_range(BeginEndNames)}; + llvm::DenseSet> Seen; + for (auto I = Replaces.begin(), E = Replaces.end(); I != E; ++I) { + const ArrayRef &Signatures = + I->getValue()->getReplacementSignatures(); + if (!Seen.insert(Signatures).second) + continue; + assert(!Signatures.empty() && + llvm::all_of(Signatures, [](auto Index) { return !Index.empty(); })); + std::vector Names(1, I->getKey()); + for (auto J = std::next(I); J != E; ++J) + if (J->getValue()->getReplacementSignatures() == Signatures) + Names.push_back(J->getKey()); + + std::vector TotalMatchers; + // As we match on the first matched signature, we need to sort the + // signatures in order of length(longest to shortest). This way any + // signature that is a subset of another signature will be matched after the + // other. + SmallVector SigVec(Signatures); + llvm::sort(SigVec, [](auto &L, auto &R) { return R.size() < L.size(); }); + for (const auto &Signature : SigVec) { + std::vector Matchers; + for (const auto &ArgPair : Signature) + Matchers.push_back(makeMatcherPair(getFullPrefix(Signature), ArgPair, + BeginNames, EndNames, + ReverseDescriptor)); + TotalMatchers.push_back( + ast_matchers::internal::DynTypedMatcher::constructVariadic( + ast_matchers::internal::DynTypedMatcher::VO_AllOf, + ASTNodeKind::getFromNodeKind(), std::move(Matchers))); + } + Finder->addMatcher( + callExpr( + callee(functionDecl(hasAnyName(std::move(Names))).bind(FuncDecl)), + ast_matchers::internal::DynTypedMatcher::constructVariadic( + ast_matchers::internal::DynTypedMatcher::VO_AnyOf, + ASTNodeKind::getFromNodeKind(), + std::move(TotalMatchers)) + .convertTo()), + this); + } +} + +static void removeFunctionArgs(DiagnosticBuilder &Diag, const CallExpr &Call, + ArrayRef Indexes, + const ASTContext &Ctx) { + llvm::SmallVector Sorted(Indexes); + llvm::sort(Sorted); + // Keep track of commas removed + llvm::SmallBitVector Commas(Call.getNumArgs()); + // The first comma is actually the '(' which we can't remove + Commas[0] = true; + for (unsigned Index : Sorted) { + const Expr *Arg = Call.getArg(Index); + if (Commas[Index]) { + if (Index >= Commas.size()) { + Diag << FixItHint::CreateRemoval(Arg->getSourceRange()); + } else { + // Remove the next comma + Commas[Index + 1] = true; + Diag << FixItHint::CreateRemoval(CharSourceRange::getTokenRange( + {Arg->getBeginLoc(), + Lexer::getLocForEndOfToken( + Arg->getEndLoc(), 0, Ctx.getSourceManager(), Ctx.getLangOpts()) + .getLocWithOffset(1)})); + } + } else { + Diag << FixItHint::CreateRemoval(CharSourceRange::getTokenRange( + Arg->getBeginLoc().getLocWithOffset(-1), Arg->getEndLoc())); + Commas[Index] = true; + } + } +} + +void UseRangesCheck::check(const MatchFinder::MatchResult &Result) { + const auto *Function = Result.Nodes.getNodeAs(FuncDecl); + std::string Qualified = "::" + Function->getQualifiedNameAsString(); + auto Iter = Replaces.find(Qualified); + assert(Iter != Replaces.end()); + SmallString<64> Buffer; + for (const Signature &Sig : Iter->getValue()->getReplacementSignatures()) { + Buffer.assign({BoundCall, getFullPrefix(Sig)}); + const auto *Call = Result.Nodes.getNodeAs(Buffer); + if (!Call) + continue; + auto Diag = createDiag(*Call); + if (auto ReplaceName = Iter->getValue()->getReplaceName(*Function)) + Diag << FixItHint::CreateReplacement(Call->getCallee()->getSourceRange(), + *ReplaceName); + if (auto Include = Iter->getValue()->getHeaderInclusion(*Function)) + Diag << Inserter.createIncludeInsertion( + Result.SourceManager->getFileID(Call->getBeginLoc()), *Include); + llvm::SmallVector ToRemove; + for (const auto &[First, Second, Replace] : Sig) { + auto ArgNode = ArgName + std::to_string(First); + if (const auto *ArgExpr = Result.Nodes.getNodeAs(ArgNode)) { + Diag << FixItHint::CreateReplacement( + Call->getArg(Replace == Indexes::Second ? Second : First) + ->getSourceRange(), + Lexer::getSourceText( + CharSourceRange::getTokenRange(ArgExpr->getSourceRange()), + Result.Context->getSourceManager(), + Result.Context->getLangOpts())); + } else { + assert(ReverseDescriptor && "Couldn't find forward argument"); + ArgNode.push_back('R'); + ArgExpr = Result.Nodes.getNodeAs(ArgNode); + assert(ArgExpr && "Couldn't find forward or reverse argument"); + if (ReverseDescriptor->ReverseHeader) + Diag << Inserter.createIncludeInsertion( + Result.SourceManager->getFileID(Call->getBeginLoc()), + *ReverseDescriptor->ReverseHeader); + Diag << FixItHint::CreateReplacement( + Call->getArg(Replace == Indexes::Second ? Second : First) + ->getSourceRange(), + SmallString<128>{ + ReverseDescriptor->ReverseAdaptorName, "(", + Lexer::getSourceText( + CharSourceRange::getTokenRange(ArgExpr->getSourceRange()), + Result.Context->getSourceManager(), + Result.Context->getLangOpts()), + ")"}); + } + ToRemove.push_back(Replace == Indexes::Second ? First : Second); + } + removeFunctionArgs(Diag, *Call, ToRemove, *Result.Context); + return; + } + llvm_unreachable("No valid signature found"); +} + +bool UseRangesCheck::isLanguageVersionSupported( + const LangOptions &LangOpts) const { + return LangOpts.CPlusPlus11; +} + +UseRangesCheck::UseRangesCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + Inserter(Options.getLocalOrGlobal("IncludeStyle", + utils::IncludeSorter::IS_LLVM), + areDiagsSelfContained()) {} + +void UseRangesCheck::registerPPCallbacks(const SourceManager &, + Preprocessor *PP, Preprocessor *) { + Inserter.registerPreprocessor(PP); +} + +void UseRangesCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "IncludeStyle", Inserter.getStyle()); +} + +std::optional +UseRangesCheck::Replacer::getHeaderInclusion(const NamedDecl &) const { + return std::nullopt; +} + +DiagnosticBuilder UseRangesCheck::createDiag(const CallExpr &Call) { + return diag(Call.getBeginLoc(), "use a ranges version of this algorithm"); +} + +std::optional +UseRangesCheck::getReverseDescriptor() const { + return std::nullopt; +} + +ArrayRef> +UseRangesCheck::getFreeBeginEndMethods() const { + return {}; +} + +std::optional UseRangesCheck::getCheckTraversalKind() const { + return TK_IgnoreUnlessSpelledInSource; +} +} // namespace clang::tidy::utils diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h new file mode 100644 index 00000000000000..8227d8f7bbbddf --- /dev/null +++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h @@ -0,0 +1,94 @@ +//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_USERANGESCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_USERANGESCHECK_H + +#include "../ClangTidyCheck.h" +#include "IncludeInserter.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang::tidy::utils { + +/// Base class for handling converting std iterator algorithms to a range +/// equivalent. +class UseRangesCheck : public ClangTidyCheck { +public: + struct Indexes { + enum Replace { First, Second }; + unsigned BeginArg; + unsigned EndArg = BeginArg + 1; + Replace ReplaceArg = First; + }; + + using Signature = SmallVector; + + struct ReverseIteratorDescriptor { + StringRef ReverseAdaptorName; + std::optional ReverseHeader; + ArrayRef> FreeReverseNames; + }; + + class Replacer : public llvm::RefCountedBase { + public: + /// Gets the name to replace a function with, return std::nullopt for a + /// replacement where we just call a different overload. + virtual std::optional + getReplaceName(const NamedDecl &OriginalName) const = 0; + + /// Gets the header needed to access the replaced function + /// Return std::nullopt if no new header is needed. + virtual std::optional + getHeaderInclusion(const NamedDecl &OriginalName) const; + + /// Gets an array of all the possible overloads for a function with indexes + /// where begin and end arguments are. + virtual ArrayRef getReplacementSignatures() const = 0; + virtual ~Replacer() = default; + }; + + using ReplacerMap = llvm::StringMap>; + + UseRangesCheck(StringRef Name, ClangTidyContext *Context); + /// Gets a map of function to replace and methods to create the replacements + virtual ReplacerMap getReplacerMap() const = 0; + /// Create a diagnostic for the CallExpr + /// Override this to support custom diagnostic messages + virtual DiagnosticBuilder createDiag(const CallExpr &Call); + + virtual std::optional getReverseDescriptor() const; + + /// Gets the fully qualified names of begin and end functions. + /// The functions must take the container as their one and only argument + /// `::std::begin` and `::std::end` are a common example + virtual ArrayRef> + getFreeBeginEndMethods() const; + + void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, + Preprocessor *ModuleExpanderPP) final; + void registerMatchers(ast_matchers::MatchFinder *Finder) final; + void check(const ast_matchers::MatchFinder::MatchResult &Result) final; + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override; + void storeOptions(ClangTidyOptions::OptionMap &Options) override; + std::optional getCheckTraversalKind() const override; + +private: + ReplacerMap Replaces; + std::optional ReverseDescriptor; + IncludeInserter Inserter; +}; + +} // namespace clang::tidy::utils + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_UTILS_USERANGESCHECK_H diff --git a/clang-tools-extra/clangd/support/ThreadsafeFS.cpp b/clang-tools-extra/clangd/support/ThreadsafeFS.cpp index 0e249d07d2fd91..7398e4258527ba 100644 --- a/clang-tools-extra/clangd/support/ThreadsafeFS.cpp +++ b/clang-tools-extra/clangd/support/ThreadsafeFS.cpp @@ -41,7 +41,7 @@ class VolatileFileSystem : public llvm::vfs::ProxyFileSystem { llvm::StringRef FileName = llvm::sys::path::filename(Path); if (FileName.starts_with("preamble-") && FileName.ends_with(".pch")) return File; - return std::unique_ptr(new VolatileFile(std::move(*File))); + return std::make_unique(std::move(*File)); } private: diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index e570c8184f8b0a..bde096b9eebd9f 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -131,6 +131,12 @@ Improvements to clang-tidy New checks ^^^^^^^^^^ +- New :doc:`boost-use-ranges + ` check. + + Detects calls to standard library iterator algorithms that could be replaced + with a Boost ranges version instead. + - New :doc:`bugprone-crtp-constructor-accessibility ` check. @@ -174,6 +180,12 @@ New checks Finds initializer lists for aggregate types that could be written as designated initializers instead. +- New :doc:`modernize-use-ranges + ` check. + + Detects calls to standard library iterator algorithms that could be replaced + with a ranges version instead. + - New :doc:`modernize-use-std-format ` check. @@ -277,7 +289,10 @@ Changes in existing checks - Improved :doc:`bugprone-use-after-move ` check to also handle - calls to ``std::forward``. + calls to ``std::forward``. Fixed sequencing of designated initializers. Fixed + sequencing of callees: In C++17 and later, the callee of a function is guaranteed + to be sequenced before the arguments, so don't warn if the use happens in the + callee and the move happens in one of the arguments. - Improved :doc:`cppcoreguidelines-avoid-non-const-global-variables ` check diff --git a/clang-tools-extra/docs/clang-tidy/checks/boost/use-ranges.rst b/clang-tools-extra/docs/clang-tidy/checks/boost/use-ranges.rst new file mode 100644 index 00000000000000..b18a38e807f930 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/boost/use-ranges.rst @@ -0,0 +1,86 @@ +.. title:: clang-tidy - boost-use-ranges + +boost-use-ranges +================ + +Detects calls to standard library iterator algorithms that could be replaced +with a Boost ranges version instead. + +Example +------- + +.. code-block:: c++ + + auto Iter1 = std::find(Items.begin(), Items.end(), 0); + auto AreSame = std::equal(Items1.cbegin(), Items1.cend(), std::begin(Items2), + std::end(Items2)); + + +transforms to: + +.. code-block:: c++ + + auto Iter1 = boost::range::find(Items, 0); + auto AreSame = boost::range::equal(Items1, Items2); + +Calls to the following std library algorithms are checked: +``includes``,``set_union``,``set_intersection``,``set_difference``, +``set_symmetric_difference``,``unique``,``lower_bound``,``stable_sort``, +``equal_range``,``remove_if``,``sort``,``random_shuffle``,``remove_copy``, +``stable_partition``,``remove_copy_if``,``count``,``copy_backward``, +``reverse_copy``,``adjacent_find``,``remove``,``upper_bound``,``binary_search``, +``replace_copy_if``,``for_each``,``generate``,``count_if``,``min_element``, +``reverse``,``replace_copy``,``fill``,``unique_copy``,``transform``,``copy``, +``replace``,``find``,``replace_if``,``find_if``,``partition``,``max_element``, +``find_end``,``merge``,``partial_sort_copy``,``find_first_of``,``search``, +``lexicographical_compare``,``equal``,``mismatch``,``next_permutation``, +``prev_permutation``,``push_heap``,``pop_heap``,``make_heap``,``sort_heap``, +``copy_if``,``is_permutation``,``is_partitioned``,``find_if_not``, +``partition_copy``,``any_of``,``iota``,``all_of``,``partition_point``, +``is_sorted``,``none_of``,``is_sorted_until``,``reduce``,``accumulate``, +``parital_sum``,``adjacent_difference``. + +The check will also look for the following functions from the +``boost::algorithm`` namespace: +``reduce``,``find_backward``,``find_not_backward``,``find_if_backward``, +``find_if_not_backward``,``hex``,``hex_lower``,``unhex``, +``is_partitioned_until``,``is_palindrome``,``copy_if``,``copy_while``, +``copy_until``,``copy_if_while``,``copy_if_until``,``is_permutation``, +``is_partitioned``,``one_of``,``one_of_equal``,``find_if_not``, +``partition_copy``,``any_of``,``any_of_equal``,``iota``,``all_of``, +``all_of_equal``,``partition_point``,``is_sorted_until``,``is_sorted``, +``is_increasing``,``is_decreasing``,``is_strictly_increasing``, +``is_strictly_decreasing``,``none_of``,``none_of_equal``,``clamp_range``, +``apply_permutation``,``apply_reverse_permutation``. + +Reverse Iteration +----------------- + +If calls are made using reverse iterators on containers, The code will be +fixed using the ``boost::adaptors::reverse`` adaptor. + +.. code-block:: c++ + + auto AreSame = std::equal(Items1.rbegin(), Items1.rend(), + std::crbegin(Items2), std::crend(Items2)); + +transformst to: + +.. code-block:: c++ + + auto AreSame = std::equal(boost::adaptors::reverse(Items1), + boost::adaptors::reverse(Items2)); + +Options +------- + +.. option:: IncludeStyle + + A string specifying which include-style is used, `llvm` or `google`. Default + is `llvm`. + +.. option:: IncludeBoostSystem + + If `true` (default value) the boost headers are included as system headers + with angle brackets (`#include `), otherwise quotes are used + (`#include "boost.hpp"`). diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 9671f3895f5d35..dd2887edb0f8da 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -75,6 +75,7 @@ Clang-Tidy Checks :doc:`android-cloexec-pipe2 `, "Yes" :doc:`android-cloexec-socket `, "Yes" :doc:`android-comparison-in-temp-failure-retry `, + :doc:`boost-use-ranges `, "Yes" :doc:`boost-use-to-string `, "Yes" :doc:`bugprone-argument-comment `, "Yes" :doc:`bugprone-assert-side-effect `, @@ -301,6 +302,7 @@ Clang-Tidy Checks :doc:`modernize-use-noexcept `, "Yes" :doc:`modernize-use-nullptr `, "Yes" :doc:`modernize-use-override `, "Yes" + :doc:`modernize-use-ranges `, "Yes" :doc:`modernize-use-starts-ends-with `, "Yes" :doc:`modernize-use-std-format `, "Yes" :doc:`modernize-use-std-numbers `, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-ranges.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-ranges.rst new file mode 100644 index 00000000000000..431157f7b0fb2f --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-ranges.rst @@ -0,0 +1,79 @@ +.. title:: clang-tidy - modernize-use-ranges + +modernize-use-ranges +==================== + +Detects calls to standard library iterator algorithms that could be replaced +with a ranges version instead. + +Example +------- + +.. code-block:: c++ + + auto Iter1 = std::find(Items.begin(), Items.end(), 0); + auto AreSame = std::equal(Items1.cbegin(), Items1.cend(), + std::begin(Items2), std::end(Items2)); + + +transforms to: + +.. code-block:: c++ + + auto Iter1 = std::ranges::find(Items, 0); + auto AreSame = std::ranges::equal(Items1, Items2); + +Calls to the following std library algorithms are checked: +``::std::all_of``,``::std::any_of``,``::std::none_of``,``::std::for_each``, +``::std::find``,``::std::find_if``,``::std::find_if_not``, +``::std::adjacent_find``,``::std::copy``,``::std::copy_if``, +``::std::copy_backward``,``::std::move``,``::std::move_backward``, +``::std::fill``,``::std::transform``,``::std::replace``,``::std::replace_if``, +``::std::generate``,``::std::remove``,``::std::remove_if``, +``::std::remove_copy``,``::std::remove_copy_if``,``::std::unique``, +``::std::unique_copy``,``::std::sample``,``::std::partition_point``, +``::std::lower_bound``,``::std::upper_bound``,``::std::equal_range``, +``::std::binary_search``,``::std::push_heap``,``::std::pop_heap``, +``::std::make_heap``,``::std::sort_heap``,``::std::next_permutation``, +``::std::prev_permutation``,``::std::iota``,``::std::reverse``, +``::std::reverse_copy``,``::std::shift_left``,``::std::shift_right``, +``::std::is_partitioned``,``::std::partition``,``::std::partition_copy``, +``::std::stable_partition``,``::std::sort``,``::std::stable_sort``, +``::std::is_sorted``,``::std::is_sorted_until``,``::std::is_heap``, +``::std::is_heap_until``,``::std::max_element``,``::std::min_element``, +``::std::minmax_element``,``::std::uninitialized_copy``, +``::std::uninitialized_fill``,``::std::uninitialized_move``, +``::std::uninitialized_default_construct``, +``::std::uninitialized_value_construct``,``::std::destroy``, +``::std::partial_sort_copy``,``::std::includes``, +``::std::set_union``,``::std::set_intersection``,``::std::set_difference``, +``::std::set_symmetric_difference``,``::std::merge``, +``::std::lexicographical_compare``,``::std::find_end``,``::std::search``, +``::std::is_permutation``,``::std::equal``,``::std::mismatch``. + +Reverse Iteration +----------------- + +If calls are made using reverse iterators on containers, The code will be +fixed using the ``std::views::reverse`` adaptor. + +.. code-block:: c++ + + auto AreSame = std::equal(Items1.rbegin(), Items1.rend(), + std::crbegin(Items2), std::crend(Items2)); + +transformst to: + +.. code-block:: c++ + + auto AreSame = std::equal(std::views::reverse(Items1), + std::views::reverse(Items2)); + +Options +------- + +.. option:: IncludeStyle + + A string specifying which include-style is used, `llvm` or `google`. Default + is `llvm`. + diff --git a/clang-tools-extra/test/clang-tidy/checkers/boost/use-ranges.cpp b/clang-tools-extra/test/clang-tidy/checkers/boost/use-ranges.cpp new file mode 100644 index 00000000000000..3f3d6f1abec9f4 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/boost/use-ranges.cpp @@ -0,0 +1,194 @@ +// RUN: %check_clang_tidy -std=c++14 %s boost-use-ranges %t +// RUN: %check_clang_tidy -std=c++17 %s boost-use-ranges %t -check-suffixes=,CPP17 + +// CHECK-FIXES: #include +// CHECK-FIXES: #include +// CHECK-FIXES: #include +// CHECK-FIXES: #include +// CHECK-FIXES: #include +// CHECK-FIXES: #include +// CHECK-FIXES: #include +// CHECK-FIXES: #include +// CHECK-FIXES-CPP17: #include +// CHECK-FIXES: #include +// CHECK-FIXES: #include + +namespace std { + +template class vector { +public: + using iterator = T *; + using const_iterator = const T *; + constexpr const_iterator begin() const; + constexpr const_iterator end() const; + constexpr const_iterator cbegin() const; + constexpr const_iterator cend() const; + constexpr iterator begin(); + constexpr iterator end(); +}; + +template constexpr auto begin(const Container &Cont) { + return Cont.begin(); +} + +template constexpr auto begin(Container &Cont) { + return Cont.begin(); +} + +template constexpr auto end(const Container &Cont) { + return Cont.end(); +} + +template constexpr auto end(Container &Cont) { + return Cont.end(); +} + +template constexpr auto cbegin(const Container &Cont) { + return Cont.cbegin(); +} + +template constexpr auto cend(const Container &Cont) { + return Cont.cend(); +} +// Find +template< class InputIt, class T > +InputIt find(InputIt first, InputIt last, const T& value); + +template void reverse(Iter begin, Iter end); + +template +bool includes(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2); + +template +bool is_permutation(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, + ForwardIt2 last2); + +template +bool next_permutation(BidirIt first, BidirIt last); + +template +bool equal(ForwardIt1 first1, ForwardIt1 last1, + ForwardIt2 first2, ForwardIt2 last2); + +template +void push_heap(RandomIt first, RandomIt last); + +template +OutputIt copy_if(InputIt first, InputIt last, OutputIt d_first, UnaryPred pred); + +template +ForwardIt is_sorted_until(ForwardIt first, ForwardIt last); + +template +void reduce(InputIt first, InputIt last); + +template +T reduce(InputIt first, InputIt last, T init); + +template +T reduce(InputIt first, InputIt last, T init, BinaryOp op) { + // Need a definition to suppress undefined_internal_type when invoked with lambda + return init; +} + +template +T accumulate(InputIt first, InputIt last, T init); + +} // namespace std + +namespace boost { +namespace range_adl_barrier { +template void *begin(T &); +template void *end(T &); +template void *const_begin(const T &); +template void *const_end(const T &); +} // namespace range_adl_barrier +using namespace range_adl_barrier; + +template void *rbegin(T &); +template void *rend(T &); + +template void *const_rbegin(T &); +template void *const_rend(T &); +namespace algorithm { + +template +T reduce(InputIterator first, InputIterator last, T init, BinaryOperation bOp) { + return init; +} +} // namespace algorithm +} // namespace boost + +bool returnTrue(int val) { + return true; +} + +void stdLib() { + std::vector I, J; + std::find(I.begin(), I.end(), 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::range::find(I, 0); + + std::reverse(I.cbegin(), I.cend()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::range::reverse(I); + + std::includes(I.begin(), I.end(), std::begin(J), std::end(J)); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::range::includes(I, J); + + std::equal(std::cbegin(I), std::cend(I), J.begin(), J.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::range::equal(I, J); + + std::next_permutation(I.begin(), I.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::range::next_permutation(I); + + std::push_heap(I.begin(), I.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::range::push_heap(I); + + std::copy_if(I.begin(), I.end(), J.begin(), &returnTrue); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::algorithm::copy_if(I, J.begin(), &returnTrue); + + std::is_sorted_until(I.begin(), I.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::algorithm::is_sorted_until(I); + + std::reduce(I.begin(), I.end()); + // CHECK-MESSAGES-CPP17: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES-CPP17: boost::algorithm::reduce(I); + + std::reduce(I.begin(), I.end(), 2); + // CHECK-MESSAGES-CPP17: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES-CPP17: boost::algorithm::reduce(I, 2); + + std::reduce(I.begin(), I.end(), 0, [](int a, int b){ return a + b; }); + // CHECK-MESSAGES-CPP17: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES-CPP17: boost::algorithm::reduce(I, 0, [](int a, int b){ return a + b; }); + + std::equal(boost::rbegin(I), boost::rend(I), J.begin(), J.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::range::equal(boost::adaptors::reverse(I), J); + + std::accumulate(I.begin(), I.end(), 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a boost version of this algorithm + // CHECK-FIXES: boost::accumulate(I, 0); +} + +void boostLib() { + std::vector I; + boost::algorithm::reduce(I.begin(), I.end(), 0, [](int a, int b){ return a + b; }); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranged version of this algorithm + // CHECK-FIXES: boost::algorithm::reduce(I, 0, [](int a, int b){ return a + b; }); + + boost::algorithm::reduce(boost::begin(I), boost::end(I), 1, [](int a, int b){ return a + b; }); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranged version of this algorithm + // CHECK-FIXES: boost::algorithm::reduce(I, 1, [](int a, int b){ return a + b; }); + + boost::algorithm::reduce(boost::const_begin(I), boost::const_end(I), 2, [](int a, int b){ return a + b; }); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranged version of this algorithm + // CHECK-FIXES: boost::algorithm::reduce(I, 2, [](int a, int b){ return a + b; }); +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp index 7d9f63479a1b4e..6a4e3990e36dc5 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp @@ -1,3 +1,4 @@ +// RUN: %check_clang_tidy -std=c++11 -check-suffixes=,CXX11 %s bugprone-use-after-move %t -- -- -fno-delayed-template-parsing // RUN: %check_clang_tidy -std=c++17-or-later %s bugprone-use-after-move %t -- -- -fno-delayed-template-parsing typedef decltype(nullptr) nullptr_t; @@ -135,6 +136,7 @@ class A { A &operator=(A &&); void foo() const; + void bar(int i) const; int getInt() const; operator bool() const; @@ -576,6 +578,19 @@ void useAndMoveInLoop() { std::move(a); } } + // Same as above, but the use and the move are in different CFG blocks. + { + A a; + for (int i = 0; i < 10; ++i) { + if (i < 10) + a.foo(); + // CHECK-NOTES: [[@LINE-1]]:9: warning: 'a' used after it was moved + // CHECK-NOTES: [[@LINE+3]]:9: note: move occurred here + // CHECK-NOTES: [[@LINE-3]]:9: note: the use happens in a later loop + if (i < 10) + std::move(a); + } + } // However, this case shouldn't be flagged -- the scope of the declaration of // 'a' is important. { @@ -1352,6 +1367,40 @@ void ifWhileAndSwitchSequenceInitDeclAndCondition() { } } +// In a function call, the expression that determines the callee is sequenced +// before the arguments -- but only in C++17 and later. +namespace CalleeSequencedBeforeArguments { +int consumeA(std::unique_ptr a); +int consumeA(A &&a); + +void calleeSequencedBeforeArguments() { + { + std::unique_ptr a; + a->bar(consumeA(std::move(a))); + // CHECK-NOTES-CXX11: [[@LINE-1]]:5: warning: 'a' used after it was moved + // CHECK-NOTES-CXX11: [[@LINE-2]]:21: note: move occurred here + // CHECK-NOTES-CXX11: [[@LINE-3]]:5: note: the use and move are unsequenced + } + { + std::unique_ptr a; + std::unique_ptr getArg(std::unique_ptr a); + getArg(std::move(a))->bar(a->getInt()); + // CHECK-NOTES: [[@LINE-1]]:31: warning: 'a' used after it was moved + // CHECK-NOTES: [[@LINE-2]]:12: note: move occurred here + // CHECK-NOTES-CXX11: [[@LINE-3]]:31: note: the use and move are unsequenced + } + { + A a; + // Nominally, the callee `a.bar` is evaluated before the argument + // `consumeA(std::move(a))`, but in effect `a` is only accessed after the + // call to `A::bar()` happens, i.e. after the argument has been evaluted. + a.bar(consumeA(std::move(a))); + // CHECK-NOTES: [[@LINE-1]]:5: warning: 'a' used after it was moved + // CHECK-NOTES: [[@LINE-2]]:11: note: move occurred here + } +} +} // namespace CalleeSequencedBeforeArguments + // Some statements in templates (e.g. null, break and continue statements) may // be shared between the uninstantiated and instantiated versions of the // template and therefore have multiple parents. Make sure the sequencing code @@ -1469,7 +1518,6 @@ class CtorInitOrder { // CHECK-NOTES: [[@LINE-1]]:11: warning: 'val' used after it was moved s{std::move(val)} {} // wrong order // CHECK-NOTES: [[@LINE-1]]:9: note: move occurred here - // CHECK-NOTES: [[@LINE-4]]:11: note: the use happens in a later loop iteration than the move private: bool a; diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-ranges.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-ranges.cpp new file mode 100644 index 00000000000000..623af26e3cdc73 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-ranges.cpp @@ -0,0 +1,208 @@ +// RUN: %check_clang_tidy -std=c++20 %s modernize-use-ranges %t +// RUN: %check_clang_tidy -std=c++23 %s modernize-use-ranges %t -check-suffixes=,CPP23 + +// CHECK-FIXES: #include +// CHECK-FIXES-CPP23: #include +// CHECK-FIXES: #include + +namespace std { + +template class vector { +public: + using iterator = T *; + using const_iterator = const T *; + using reverse_iterator = T*; + using reverse_const_iterator = const T*; + + constexpr const_iterator begin() const; + constexpr const_iterator end() const; + constexpr const_iterator cbegin() const; + constexpr const_iterator cend() const; + constexpr iterator begin(); + constexpr iterator end(); + constexpr reverse_const_iterator rbegin() const; + constexpr reverse_const_iterator rend() const; + constexpr reverse_const_iterator crbegin() const; + constexpr reverse_const_iterator crend() const; + constexpr reverse_iterator rbegin(); + constexpr reverse_iterator rend(); +}; + +template constexpr auto begin(const Container &Cont) { + return Cont.begin(); +} + +template constexpr auto begin(Container &Cont) { + return Cont.begin(); +} + +template constexpr auto end(const Container &Cont) { + return Cont.end(); +} + +template constexpr auto end(Container &Cont) { + return Cont.end(); +} + +template constexpr auto cbegin(const Container &Cont) { + return Cont.cbegin(); +} + +template constexpr auto cend(const Container &Cont) { + return Cont.cend(); +} + +template constexpr auto rbegin(const Container &Cont) { + return Cont.rbegin(); +} + +template constexpr auto rbegin(Container &Cont) { + return Cont.rbegin(); +} + +template constexpr auto rend(const Container &Cont) { + return Cont.rend(); +} + +template constexpr auto rend(Container &Cont) { + return Cont.rend(); +} + +template constexpr auto crbegin(const Container &Cont) { + return Cont.crbegin(); +} + +template constexpr auto crend(const Container &Cont) { + return Cont.crend(); +} +// Find +template< class InputIt, class T > +InputIt find( InputIt first, InputIt last, const T& value ); + +// Reverse +template void reverse(Iter begin, Iter end); + +// Includes +template +bool includes(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2); + +// IsPermutation +template +bool is_permutation(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2); +template +bool is_permutation(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, + ForwardIt2 last2); + +// Equal +template +bool equal(InputIt1 first1, InputIt1 last1, InputIt2 first2); + +template +bool equal(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2); + +template +bool equal(InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2, BinaryPred p) { + // Need a definition to suppress undefined_internal_type when invoked with lambda + return true; +} + +template +void iota(ForwardIt first, ForwardIt last, T value); + +} // namespace std + +void Positives() { + std::vector I, J; + std::find(I.begin(), I.end(), 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::find(I, 0); + + std::find(I.cbegin(), I.cend(), 1); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::find(I, 1); + + std::find(std::begin(I), std::end(I), 2); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::find(I, 2); + + std::find(std::cbegin(I), std::cend(I), 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::find(I, 3); + + std::find(std::cbegin(I), I.cend(), 4); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::find(I, 4); + + std::reverse(I.begin(), I.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::reverse(I); + + std::includes(I.begin(), I.end(), I.begin(), I.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::includes(I, I); + + std::includes(I.begin(), I.end(), J.begin(), J.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::includes(I, J); + + std::is_permutation(I.begin(), I.end(), J.begin(), J.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::is_permutation(I, J); + + std::equal(I.begin(), I.end(), J.begin(), J.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::equal(I, J); + + std::equal(I.begin(), I.end(), J.begin(), J.end(), [](int a, int b){ return a == b; }); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::equal(I, J, [](int a, int b){ return a == b; }); + + std::iota(I.begin(), I.end(), 0); + // CHECK-MESSAGES-CPP23: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES-CPP23: std::ranges::iota(I, 0); + + using std::find; + namespace my_std = std; + + // Potentially these could be updated to better qualify the replaced function name + find(I.begin(), I.end(), 5); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::find(I, 5); + + my_std::find(I.begin(), I.end(), 6); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::find(I, 6); +} + +void Reverse(){ + std::vector I, J; + std::find(I.rbegin(), I.rend(), 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::find(std::views::reverse(I), 0); + + std::equal(std::rbegin(I), std::rend(I), J.begin(), J.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::equal(std::views::reverse(I), J); + + std::equal(I.begin(), I.end(), std::crbegin(J), std::crend(J)); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use a ranges version of this algorithm + // CHECK-FIXES: std::ranges::equal(I, std::views::reverse(J)); +} + +void Negatives() { + std::vector I, J; + std::find(I.begin(), J.end(), 0); + std::find(I.begin(), I.begin(), 0); + std::find(I.end(), I.begin(), 0); + + + // Need both ranges for this one + std::is_permutation(I.begin(), I.end(), J.begin()); + + // We only have one valid match here and the ranges::equal function needs 2 complete ranges + std::equal(I.begin(), I.end(), J.begin()); + std::equal(I.begin(), I.end(), J.end(), J.end()); + std::equal(std::rbegin(I), std::rend(I), std::rend(J), std::rbegin(J)); + std::equal(I.begin(), J.end(), I.begin(), I.end()); +} diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 75e2a87c319624..e00d18ab9a2908 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -973,6 +973,8 @@ Bug Fixes to C++ Support of the address of operator. (#GH97483). - Fixed an assertion failure about a constant expression which is a known integer but is not evaluated to an integer. (#GH96670). +- Fixed a bug where references to lambda capture inside a ``noexcept`` specifier were not correctly + instantiated. (#GH95735). - Fix a crash when checking destructor reference with an invalid initializer. (#GH97230). Bug Fixes to AST Handling diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 5fb199b1b2b032..36bd2b69dbbcb9 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -292,6 +292,8 @@ TARGET_HEADER_BUILTIN(_CountOneBits64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGE TARGET_HEADER_BUILTIN(__prefetch, "vvC*", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__hlt, "UiUi.", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef LANGBUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index de758cbe679dcf..33b1d58bb5b099 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -367,6 +367,15 @@ def warn_target_unrecognized_env : Warning< def err_target_unsupported_abi_with_fpu : Error< "'%0' ABI is not supported with FPU">; +def err_ppc_impossible_musttail: Error< + "'musttail' attribute for this call is impossible because %select{" + "long calls can not be tail called on PPC|" + "indirect calls can not be tail called on PPC|" + "external calls can not be tail called on PPC}0" + >; +def err_aix_musttail_unsupported: Error< + "'musttail' attribute is not supported on AIX">; + // Source manager def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal; def err_file_modified : Error< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 44fd51ec9abc96..1aba8bc24ba2fa 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7696,7 +7696,6 @@ def err_qualified_objc_access : Error< def ext_freestanding_complex : Extension< "complex numbers are an extension in a freestanding C99 implementation">; -// FIXME: Remove when we support imaginary. def err_imaginary_not_supported : Error<"imaginary types are not supported">; // Obj-c expressions diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 37d570ca5e75b5..e6054425909098 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -331,6 +331,11 @@ KEYWORD(_Atomic , KEYALL|KEYNOOPENCL) KEYWORD(_Bool , KEYNOCXX) KEYWORD(_Complex , KEYALL) KEYWORD(_Generic , KEYALL) +// Note, C2y removed support for _Imaginary; we retain it as a keyword because +// 1) it's a reserved identifier, so we're allowed to steal it, 2) there's no +// good way to specify a keyword in earlier but not later language modes within +// this file, 3) this allows us to provide a better diagnostic in case a user +// does use the keyword. KEYWORD(_Imaginary , KEYALL) KEYWORD(_Noreturn , KEYALL) KEYWORD(_Static_assert , KEYALL) diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index 23bc780e04979d..425b6e2a0b30c9 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -269,7 +269,7 @@ class DeclSpec { enum TSC { TSC_unspecified, - TSC_imaginary, + TSC_imaginary, // Unsupported TSC_complex }; @@ -875,7 +875,7 @@ class DeclSpec { } /// Finish - This does final analysis of the declspec, issuing diagnostics for - /// things like "_Imaginary" (lacking an FP type). After calling this method, + /// things like "_Complex" (lacking an FP type). After calling this method, /// DeclSpec is guaranteed self-consistent, even if an error occurred. void Finish(Sema &S, const PrintingPolicy &Policy); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 16ed6d88d1cb1e..490c4a2fc525cd 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -612,19 +612,26 @@ LinkageComputer::getLVForNamespaceScopeDecl(const NamedDecl *D, assert(D->getDeclContext()->getRedeclContext()->isFileContext() && "Not a name having namespace scope"); ASTContext &Context = D->getASTContext(); + const auto *Var = dyn_cast(D); // C++ [basic.link]p3: // A name having namespace scope (3.3.6) has internal linkage if it // is the name of - if (getStorageClass(D->getCanonicalDecl()) == SC_Static) { + if ((getStorageClass(D->getCanonicalDecl()) == SC_Static) || + (Context.getLangOpts().C23 && Var && Var->isConstexpr())) { // - a variable, variable template, function, or function template // that is explicitly declared static; or // (This bullet corresponds to C99 6.2.2p3.) + + // C23 6.2.2p3 + // If the declaration of a file scope identifier for + // an object contains any of the storage-class specifiers static or + // constexpr then the identifier has internal linkage. return LinkageInfo::internal(); } - if (const auto *Var = dyn_cast(D)) { + if (Var) { // - a non-template variable of non-volatile const-qualified type, unless // - it is explicitly declared extern, or // - it is declared in the purview of a module interface unit diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 89c5566f7ad091..4ba4a49311d36b 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -93,6 +93,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector &Features, HasQuadwordAtomics = true; } else if (Feature == "+aix-shared-lib-tls-model-opt") { HasAIXShLibTLSModelOpt = true; + } else if (Feature == "+longcall") { + UseLongCalls = true; } // TODO: Finish this list and add an assert that we've handled them // all. @@ -728,6 +730,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const { .Case("isa-v31-instructions", IsISA3_1) .Case("quadword-atomics", HasQuadwordAtomics) .Case("aix-shared-lib-tls-model-opt", HasAIXShLibTLSModelOpt) + .Case("longcall", UseLongCalls) .Default(false); } diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index e4d6a02386da58..b15ab6fbcf492f 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -82,6 +82,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool IsISA3_1 = false; bool HasQuadwordAtomics = false; bool HasAIXShLibTLSModelOpt = false; + bool UseLongCalls = false; protected: std::string ABI; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 276d492955207a..1f6fc842ddd955 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -457,6 +457,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasCF = true; } else if (Feature == "+zu") { HasZU = true; + } else if (Feature == "+branch-hint") { + HasBranchHint = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -1292,6 +1294,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("nf", HasNF) .Case("cf", HasCF) .Case("zu", HasZU) + .Case("branch-hint", HasBranchHint) .Default(false); } diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 5ce4953251bc34..a70711f4ae2bb2 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -174,6 +174,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasCF = false; bool HasZU = false; bool HasInlineAsmUseGPR32 = false; + bool HasBranchHint = false; protected: llvm::X86::CPUKind CPU = llvm::X86::CK_None; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 268137b319f76f..6cc0d9485720c0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11506,6 +11506,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Address, RW, Locality, Data}); } + if (BuiltinID == AArch64::BI__hlt) { + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt); + Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); + + // Return 0 for convenience, even though MSVC returns some other undefined + // value. + return ConstantInt::get(Builder.getInt32Ty(), 0); + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (std::optional MsvcIntId = diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 2b301130ef7b70..7e7b2b395f7d63 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5751,8 +5751,35 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (llvm::CallInst *Call = dyn_cast(CI)) { if (TargetDecl && TargetDecl->hasAttr()) Call->setTailCallKind(llvm::CallInst::TCK_NoTail); - else if (IsMustTail) + else if (IsMustTail) { + if (getTarget().getTriple().isPPC()) { + if (getTarget().getTriple().isOSAIX()) + CGM.getDiags().Report(Loc, diag::err_aix_musttail_unsupported); + else if (!getTarget().hasFeature("pcrelative-memops")) { + if (getTarget().hasFeature("longcall")) + CGM.getDiags().Report(Loc, diag::err_ppc_impossible_musttail) << 0; + else if (Call->isIndirectCall()) + CGM.getDiags().Report(Loc, diag::err_ppc_impossible_musttail) << 1; + else if (isa_and_nonnull(TargetDecl)) { + if (!cast(TargetDecl)->isDefined()) + // The undefined callee may be a forward declaration. Without + // knowning all symbols in the module, we won't know the symbol is + // defined or not. Collect all these symbols for later diagnosing. + CGM.addUndefinedGlobalForTailCall( + {cast(TargetDecl), Loc}); + else { + llvm::GlobalValue::LinkageTypes Linkage = CGM.getFunctionLinkage( + GlobalDecl(cast(TargetDecl))); + if (llvm::GlobalValue::isWeakForLinker(Linkage) || + llvm::GlobalValue::isDiscardableIfUnused(Linkage)) + CGM.getDiags().Report(Loc, diag::err_ppc_impossible_musttail) + << 2; + } + } + } + } Call->setTailCallKind(llvm::CallInst::TCK_MustTail); + } } // Add metadata for calls to MSAllocator functions diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 84ad3b566b6472..000d4ff5c06986 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -817,8 +817,6 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { // // But we can fold away components which would be zero due to a real // operand according to C11 Annex G.5.1p2. - // FIXME: C11 also provides for imaginary types which would allow folding - // still more of this within the type system. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures); if (Op.LHS.second && Op.RHS.second) { @@ -1049,9 +1047,6 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { // delegate to a libcall to handle all of the complexities and minimize // underflow/overflow cases. When FastMath is allowed we construct the // divide inline using the same algorithm as for integer operands. - // - // FIXME: We would be able to avoid the libcall in many places if we - // supported imaginary types in addition to complex types. BinOpInfo LibCallOp = Op; // If LHS was a real, supply a null imaginary part. if (!LHSi) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 76ff8f5b234da6..4d05322951d0a5 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6555,7 +6555,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, } void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { - llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; + llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); // Fail Memory Clause Ordering. llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; bool MemOrderingSpecified = false; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index dc9dd034dee7ba..44bc7fbfdd37e8 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1394,6 +1394,21 @@ void CodeGenModule::Release() { // that might affect the DLL storage class or the visibility, and // before anything that might act on these. setVisibilityFromDLLStorageClass(LangOpts, getModule()); + + // Check the tail call symbols are truly undefined. + if (getTriple().isPPC() && !MustTailCallUndefinedGlobals.empty()) { + for (auto &I : MustTailCallUndefinedGlobals) { + if (!I.first->isDefined()) + getDiags().Report(I.second, diag::err_ppc_impossible_musttail) << 2; + else { + StringRef MangledName = getMangledName(GlobalDecl(I.first)); + llvm::GlobalValue *Entry = GetGlobalValue(MangledName); + if (!Entry || Entry->isWeakForLinker() || + Entry->isDeclarationForLinker()) + getDiags().Report(I.second, diag::err_ppc_impossible_musttail) << 2; + } + } + } } void CodeGenModule::EmitOpenCLMetadata() { diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 0444f9f8449f86..8b65348b879b65 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -485,6 +485,14 @@ class CodeGenModule : public CodeGenTypeCache { typedef std::pair GlobalInitData; + // When a tail call is performed on an "undefined" symbol, on PPC without pc + // relative feature, the tail call is not allowed. In "EmitCall" for such + // tail calls, the "undefined" symbols may be forward declarations, their + // definitions are provided in the module after the callsites. For such tail + // calls, diagnose message should not be emitted. + llvm::SmallSetVector, 4> + MustTailCallUndefinedGlobals; + struct GlobalInitPriorityCmp { bool operator()(const GlobalInitData &LHS, const GlobalInitData &RHS) const { @@ -1647,6 +1655,11 @@ class CodeGenModule : public CodeGenTypeCache { return getTriple().isSPIRVLogical(); } + void addUndefinedGlobalForTailCall( + std::pair Global) { + MustTailCallUndefinedGlobals.insert(Global); + } + private: bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const; diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 3146caba1c6158..1dc3172a6bdf9b 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -469,7 +469,8 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. - if (State.FreeRegs) { + if (State.CC != llvm::CallingConv::X86_FastCall && + State.CC != llvm::CallingConv::X86_VectorCall && State.FreeRegs) { --State.FreeRegs; if (!IsMCUABI) return getNaturalAlignIndirectInReg(RetTy); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index c56a0c2c46c477..ab1590104b7903 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -78,19 +78,8 @@ static bool useFramePointerForTargetByDefault(const llvm::opt::ArgList &Args, !Args.hasArg(clang::driver::options::OPT_mfentry)) return true; - if (Triple.isAndroid()) { - switch (Triple.getArch()) { - case llvm::Triple::aarch64: - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - case llvm::Triple::riscv64: - return true; - default: - break; - } - } + if (Triple.isAndroid()) + return true; switch (Triple.getArch()) { case llvm::Triple::xcore: @@ -166,7 +155,7 @@ static bool useFramePointerForTargetByDefault(const llvm::opt::ArgList &Args, static bool useLeafFramePointerForTargetByDefault(const llvm::Triple &Triple) { if (Triple.isAArch64() || Triple.isPS() || Triple.isVE() || - (Triple.isAndroid() && Triple.isRISCV64())) + (Triple.isAndroid() && !Triple.isARM())) return false; return true; diff --git a/clang/lib/Headers/float.h b/clang/lib/Headers/float.h index a565a33243df19..49d4212414d641 100644 --- a/clang/lib/Headers/float.h +++ b/clang/lib/Headers/float.h @@ -164,8 +164,8 @@ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ !defined(__STRICT_ANSI__) /* C23 5.2.5.3.3p29-30 */ -# define INFINITY (__builtin_inf()) -# define NAN (__builtin_nan("")) +# define INFINITY (__builtin_inff()) +# define NAN (__builtin_nanf("")) #endif #ifdef __STDC_WANT_IEC_60559_TYPES_EXT__ diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index d2250926ce5e10..6308c865ca9136 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -408,7 +408,10 @@ unsigned int _CountLeadingSigns64(__int64); unsigned int _CountOneBits(unsigned long); unsigned int _CountOneBits64(unsigned __int64); +unsigned int __hlt(unsigned int, ...); + void __cdecl __prefetch(const void *); + #endif /*----------------------------------------------------------------------------*\ diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp index 5036ddee35fd12..ad7870309c5df1 100644 --- a/clang/lib/Index/USRGeneration.cpp +++ b/clang/lib/Index/USRGeneration.cpp @@ -257,12 +257,20 @@ void USRGenerator::VisitFunctionDecl(const FunctionDecl *D) { !D->hasAttr()) return; - if (const TemplateArgumentList * - SpecArgs = D->getTemplateSpecializationArgs()) { + if (D->isFunctionTemplateSpecialization()) { Out << '<'; - for (unsigned I = 0, N = SpecArgs->size(); I != N; ++I) { - Out << '#'; - VisitTemplateArgument(SpecArgs->get(I)); + if (const TemplateArgumentList *SpecArgs = + D->getTemplateSpecializationArgs()) { + for (const auto &Arg : SpecArgs->asArray()) { + Out << '#'; + VisitTemplateArgument(Arg); + } + } else if (const ASTTemplateArgumentListInfo *SpecArgsWritten = + D->getTemplateSpecializationArgsAsWritten()) { + for (const auto &ArgLoc : SpecArgsWritten->arguments()) { + Out << '#'; + VisitTemplateArgument(ArgLoc.getArgument()); + } } Out << '>'; } diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp index 96c90a60b96825..9a4d52d4b6b716 100644 --- a/clang/lib/Sema/DeclSpec.cpp +++ b/clang/lib/Sema/DeclSpec.cpp @@ -1146,7 +1146,7 @@ void DeclSpec::SaveWrittenBuiltinSpecs() { } /// Finish - This does final analysis of the declspec, rejecting things like -/// "_Imaginary" (lacking an FP type). After calling this method, DeclSpec is +/// "_Complex" (lacking an FP type). After calling this method, DeclSpec is /// guaranteed to be self-consistent, even if an error occurred. void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) { // Before possibly changing their values, save specs as written. @@ -1331,8 +1331,8 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) { break; } - // TODO: if the implementation does not implement _Complex or _Imaginary, - // disallow their use. Need information about the backend. + // TODO: if the implementation does not implement _Complex, disallow their + // use. Need information about the backend. if (TypeSpecComplex != TSC_unspecified) { if (TypeSpecType == TST_unspecified) { S.Diag(TSCLoc, diag::ext_plain_complex) diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index 281d5341520547..370db341e997ec 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -1112,6 +1112,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, if (BuiltinID == AArch64::BI__break) return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 0xffff); + if (BuiltinID == AArch64::BI__hlt) + return SemaRef.BuiltinConstantArgRange(TheCall, 0, 0, 0xffff); + if (CheckNeonBuiltinFunctionCall(TI, BuiltinID, TheCall)) return true; diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 7ea760ce579386..8fea7b0cf0d47c 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -1805,7 +1805,8 @@ static void AddTypeSpecifierResults(const LangOptions &LangOpts, if (LangOpts.C99) { // C99-specific Results.AddResult(Result("_Complex", CCP_Type)); - Results.AddResult(Result("_Imaginary", CCP_Type)); + if (!LangOpts.C2y) + Results.AddResult(Result("_Imaginary", CCP_Type)); Results.AddResult(Result("_Bool", CCP_Type)); Results.AddResult(Result("restrict", CCP_Type)); } diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index a4acf3b4fbf417..7851c5d080cf38 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -4983,7 +4983,7 @@ static void AddKeywordsToConsumer(Sema &SemaRef, static const char *const CTypeSpecs[] = { "char", "const", "double", "enum", "float", "int", "long", "short", "signed", "struct", "union", "unsigned", "void", "volatile", - "_Complex", "_Imaginary", + "_Complex", // storage-specifiers as well "extern", "inline", "static", "typedef" }; @@ -4991,6 +4991,9 @@ static void AddKeywordsToConsumer(Sema &SemaRef, for (const auto *CTS : CTypeSpecs) Consumer.addKeywordResult(CTS); + if (SemaRef.getLangOpts().C99 && !SemaRef.getLangOpts().C2y) + Consumer.addKeywordResult("_Imaginary"); + if (SemaRef.getLangOpts().C99) Consumer.addKeywordResult("restrict"); if (SemaRef.getLangOpts().Bool || SemaRef.getLangOpts().CPlusPlus) diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 64f6b01bed2292..2e90f0c215b8d9 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -4704,6 +4704,12 @@ void Sema::InstantiateExceptionSpec(SourceLocation PointOfInstantiation, return; } + // The noexcept specification could reference any lambda captures. Ensure + // those are added to the LocalInstantiationScope. + LambdaScopeForCallOperatorInstantiationRAII PushLambdaCaptures( + *this, Decl, TemplateArgs, Scope, + /*ShouldAddDeclsFromParentScope=*/false); + SubstExceptionSpec(Decl, Template->getType()->castAs(), TemplateArgs); } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 066003c47eb43f..f3510a00919940 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1423,7 +1423,9 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { Result = Context.getVectorType(Result, 128/typeSize, VecKind); } - // FIXME: Imaginary. + // _Imaginary was a feature of C99 through C23 but was never supported in + // Clang. The feature was removed in C2y, but we retain the unsupported + // diagnostic for an improved user experience. if (DS.getTypeSpecComplex() == DeclSpec::TSC_imaginary) S.Diag(DS.getTypeSpecComplexLoc(), diag::err_imaginary_not_supported); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 9c55960b14cba9..c88fb002cb8ec9 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1969,9 +1969,15 @@ namespace { llvm::PointerIntPair; struct data_type { - const HeaderFileInfo &HFI; + data_type(const HeaderFileInfo &HFI, bool AlreadyIncluded, + ArrayRef KnownHeaders, + UnresolvedModule Unresolved) + : HFI(HFI), AlreadyIncluded(AlreadyIncluded), + KnownHeaders(KnownHeaders), Unresolved(Unresolved) {} + + HeaderFileInfo HFI; bool AlreadyIncluded; - ArrayRef KnownHeaders; + SmallVector KnownHeaders; UnresolvedModule Unresolved; }; using data_type_ref = const data_type &; diff --git a/clang/test/C/C2y/n3254.c b/clang/test/C/C2y/n3254.c new file mode 100644 index 00000000000000..e08659cf377aa6 --- /dev/null +++ b/clang/test/C/C2y/n3254.c @@ -0,0 +1,149 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple=x86_64 -std=c2y %s -emit-llvm -o - | FileCheck %s + +/* WG14 N3254: Yes + * Accessing byte arrays + * + * NB: this basically boils down to how LLVM handles TBAA, so the best we can + * do for a Clang test is to test that the LLVM IR we pass is reasonable and we + * presume that LLVM has the test coverage to ensure that this behavior isn't + * regressed. + */ + +struct S { + int x; + char c; + float f; +}; + +#define DECL_BUFFER(Ty, Name) alignas(Ty) unsigned char Name[sizeof(Ty)] + +// CHECK-LABEL: define dso_local i32 @foo( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[BUFFER:%.*]] = alloca [12 x i8], align 1 +// CHECK-NEXT: [[S_PTR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BUFFER]], i8 0, i64 12, i1 false) +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[S_PTR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_PTR]], align 8 +// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] +// +int foo() { + DECL_BUFFER(struct S, buffer) = {}; + struct S *s_ptr = (struct S *)buffer; + return s_ptr->x; +} + +// CHECK-LABEL: define dso_local signext i8 @bar( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[BUFFER:%.*]] = alloca [12 x i8], align 1 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i32 0, i32 1 +// CHECK-NEXT: store i8 97, ptr [[C]], align 1 +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDECAY1]], i32 0, i32 1 +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[C2]], align 1 +// CHECK-NEXT: ret i8 [[TMP0]] +// +char bar() { + DECL_BUFFER(struct S, buffer); + ((struct S *)buffer)->c = 'a'; + return ((struct S *)buffer)->c; +} + +// CHECK-LABEL: define dso_local float @baz( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[BUFFER:%.*]] = alloca [12 x i8], align 1 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i32 0, i32 2 +// CHECK-NEXT: store float 3.000000e+00, ptr [[F]], align 1 +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F2]], align 1 +// CHECK-NEXT: ret float [[TMP0]] +// +float baz() { + DECL_BUFFER(struct S, buffer); + ((struct S *)buffer)->f = 3.0f; + return ((const struct S *)buffer)->f; +} + +struct T { + DECL_BUFFER(struct S, buffer); +}; + +// CHECK-LABEL: define dso_local signext i8 @quux( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_T:%.*]], align 1 +// CHECK-NEXT: [[S_PTR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[T]], i8 0, i64 12, i1 false) +// CHECK-NEXT: [[BUFFER:%.*]] = getelementptr inbounds [[STRUCT_T]], ptr [[T]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[S_PTR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_PTR]], align 8 +// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C]], align 4 +// CHECK-NEXT: ret i8 [[TMP1]] +// +char quux() { + struct T t = {}; + struct S *s_ptr = (struct S *)t.buffer; + return s_ptr->c; +} + +// CHECK-LABEL: define dso_local float @quibble( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[BUFFER:%.*]] = alloca [12 x i8], align 1 +// CHECK-NEXT: [[T_PTR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[S_PTR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[BUFFER]], i8 0, i64 12, i1 false) +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[T_PTR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_PTR]], align 8 +// CHECK-NEXT: [[BUFFER1:%.*]] = getelementptr inbounds [[STRUCT_T:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER1]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY2]], ptr [[S_PTR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_PTR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP1]], i32 0, i32 2 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F]], align 4 +// CHECK-NEXT: ret float [[TMP2]] +// +float quibble() { + DECL_BUFFER(struct T, buffer) = {}; + const struct T *t_ptr = (struct T *)buffer; + const struct S *s_ptr = (struct S *)t_ptr->buffer; + return s_ptr->f; +} + +// CHECK-LABEL: define dso_local i32 @quorble( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[BUFFER:%.*]] = alloca [12 x i8], align 1 +// CHECK-NEXT: [[S_PTR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: [[BUFFER1:%.*]] = getelementptr inbounds [[STRUCT_T:%.*]], ptr [[ARRAYDECAY]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER1]], i64 0, i64 0 +// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY2]], i32 0, i32 0 +// CHECK-NEXT: store i32 12, ptr [[X]], align 1 +// CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER]], i64 0, i64 0 +// CHECK-NEXT: [[BUFFER4:%.*]] = getelementptr inbounds [[STRUCT_T]], ptr [[ARRAYDECAY3]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [12 x i8], ptr [[BUFFER4]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[ARRAYDECAY5]], ptr [[S_PTR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_PTR]], align 8 +// CHECK-NEXT: [[X6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X6]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] +// +int quorble() { + DECL_BUFFER(struct T, buffer); + ((struct S *)((struct T *)buffer)->buffer)->x = 12; + const struct S *s_ptr = (struct S *)((struct T *)buffer)->buffer; + return s_ptr->x; +} diff --git a/clang/test/C/C2y/n3274.c b/clang/test/C/C2y/n3274.c new file mode 100644 index 00000000000000..ccdb89f4069ded --- /dev/null +++ b/clang/test/C/C2y/n3274.c @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -verify -std=c23 -Wall -pedantic %s +// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s + +/* WG14 N3274: Yes + * Remove imaginary types + */ + +// Clang has never supported _Imaginary. +#ifdef __STDC_IEC_559_COMPLEX__ +#error "When did this happen?" +#endif + +_Imaginary float i; // expected-error {{imaginary types are not supported}} + +// _Imaginary is a keyword in older language modes, but doesn't need to be one +// in C2y or later. However, to improve diagnostic behavior, we retain it as a +// keyword in all language modes -- it is not available as an identifier. +static_assert(!__is_identifier(_Imaginary)); diff --git a/clang/test/CodeGen/PowerPC/musttail-forward-declaration-inline.c b/clang/test/CodeGen/PowerPC/musttail-forward-declaration-inline.c new file mode 100644 index 00000000000000..3d8ff3985cb0f5 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/musttail-forward-declaration-inline.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -o /dev/null -emit-llvm -verify +// RUN: %clang_cc1 %s -triple powerpc64-unknown-linux-gnu -o /dev/null -emit-llvm -verify + +inline int func2(int i); +int external_call2(int i) { + // expected-error@+1 {{'musttail' attribute for this call is impossible because external calls can not be tail called on PPC}} + [[clang::musttail]] return func2(i); +} + +inline int func2(int i) { + return 0; +} diff --git a/clang/test/CodeGen/PowerPC/musttail-forward-declaration-weak.c b/clang/test/CodeGen/PowerPC/musttail-forward-declaration-weak.c new file mode 100644 index 00000000000000..4314bbdd30619e --- /dev/null +++ b/clang/test/CodeGen/PowerPC/musttail-forward-declaration-weak.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -o /dev/null -emit-llvm -verify +// RUN: %clang_cc1 %s -triple powerpc64-unknown-linux-gnu -o /dev/null -emit-llvm -verify + +int func2(int i); +int external_call2(int i) { + // expected-error@+1 {{'musttail' attribute for this call is impossible because external calls can not be tail called on PPC}} + [[clang::musttail]] return func2(i); +} + +__attribute__((weak)) int func2(int i) { + return 0; +} diff --git a/clang/test/CodeGen/PowerPC/musttail-forward-declaration.c b/clang/test/CodeGen/PowerPC/musttail-forward-declaration.c new file mode 100644 index 00000000000000..061a7a8c2da9d4 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/musttail-forward-declaration.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -o /dev/null -emit-llvm -verify=good +// RUN: %clang_cc1 %s -triple powerpc64-unknown-linux-gnu -o /dev/null -emit-llvm -verify=good + +int func2(int i); +int external_call2(int i) { + // good-no-diagnostics + [[clang::musttail]] return func2(i); +} +int func2(int i) { + return 0; +} diff --git a/clang/test/CodeGen/PowerPC/musttail-indirect.cpp b/clang/test/CodeGen/PowerPC/musttail-indirect.cpp new file mode 100644 index 00000000000000..3f495002606d47 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/musttail-indirect.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -triple powerpc64-unknown-linux-gnu -o /dev/null -emit-llvm -verify +// RUN: %clang_cc1 %s -triple powerpc-unknown-linux-gnu -o /dev/null -emit-llvm -verify + +void name(int *params) { + auto fn = (void (*)(int *))1; + // expected-error@+1 {{'musttail' attribute for this call is impossible because indirect calls can not be tail called on PPC}} + [[clang::musttail]] return fn(params); +} diff --git a/clang/test/CodeGen/PowerPC/musttail-inline.c b/clang/test/CodeGen/PowerPC/musttail-inline.c new file mode 100644 index 00000000000000..05aac886971274 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/musttail-inline.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 %s -triple powerpc64-unknown-linux-gnu -o /dev/null -emit-llvm -verify +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -o /dev/null -emit-llvm -verify + +inline int foo(int x) { + return x; +} + +int bar(int x) +{ + // expected-error@+1 {{'musttail' attribute for this call is impossible because external calls can not be tail called on PPC}} + [[clang::musttail]] return foo(1); +} diff --git a/clang/test/CodeGen/PowerPC/musttail-undefined.c b/clang/test/CodeGen/PowerPC/musttail-undefined.c new file mode 100644 index 00000000000000..f2259adb018482 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/musttail-undefined.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 %s -triple powerpc64-unknown-linux-gnu -o /dev/null -emit-llvm -verify +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -o /dev/null -emit-llvm -verify + +int foo(int x); + +int bar(int x) +{ + // expected-error@+1 {{'musttail' attribute for this call is impossible because external calls can not be tail called on PPC}} + [[clang::musttail]] return foo(x); +} diff --git a/clang/test/CodeGen/PowerPC/musttail-weak.c b/clang/test/CodeGen/PowerPC/musttail-weak.c new file mode 100644 index 00000000000000..dccc7a4d8cdd2c --- /dev/null +++ b/clang/test/CodeGen/PowerPC/musttail-weak.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -triple powerpc64-ibm-aix-xcoff -o /dev/null -emit-llvm -verify=aix +// RUN: %clang_cc1 %s -triple powerpc-ibm-aix-xcoff -o /dev/null -emit-llvm -verify=aix +// RUN: %clang_cc1 %s -triple powerpc64-unknown-linux-gnu -o /dev/null -emit-llvm -verify=linux +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -o /dev/null -emit-llvm -verify=linux + +__attribute__((weak)) int func2(int i) { + return 0; +} +int external_call2(int i) { + // linux-error@+2 {{'musttail' attribute for this call is impossible because external calls can not be tail called on PPC}} + // aix-error@+1 {{'musttail' attribute is not supported on AIX}} + [[clang::musttail]] return func2(i); +} diff --git a/clang/test/CodeGen/PowerPC/musttail.c b/clang/test/CodeGen/PowerPC/musttail.c new file mode 100644 index 00000000000000..e3129263d24601 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/musttail.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 %s -triple powerpc64-ibm-aix-xcoff -o /dev/null -emit-llvm -verify=aix +// RUN: %clang_cc1 %s -triple powerpc-ibm-aix-xcoff -o /dev/null -emit-llvm -verify=aix +// RUN: %clang_cc1 %s -triple powerpc64-unknown-linux-gnu -o /dev/null -emit-llvm -verify=good +// RUN: %clang_cc1 %s -triple powerpc-unknown-linux-gnu -o /dev/null -emit-llvm -verify=good +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -o /dev/null -emit-llvm -verify=good +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -target-feature +pcrelative-memops -o /dev/null -emit-llvm -verify=good +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -target-feature +longcall -o /dev/null -emit-llvm -verify=longcall +// RUN: %clang_cc1 %s -triple powerpc64le-unknown-linux-gnu -target-feature +pcrelative-memops -target-feature +longcall -o /dev/null -emit-llvm -verify=good + +int foo(int x) { + return x; +} + +int bar(int x) +{ + // good-no-diagnostics + // longcall-error@+2 {{'musttail' attribute for this call is impossible because long calls can not be tail called on PPC}} + // aix-error@+1 {{'musttail' attribute is not supported on AIX}} + [[clang::musttail]] return foo(1); +} diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index a354ed948ca5f1..7953618d2f9d13 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -127,6 +127,15 @@ void check__break() { // CHECK-MSVC: call void @llvm.aarch64.break(i32 0) // CHECK-LINUX: error: call to undeclared function '__break' +void check__hlt() { + __hlt(0); + __hlt(1, 2, 3, 4, 5); + int x = __hlt(0); +} + +// CHECK-MSVC: call void @llvm.aarch64.hlt(i32 0) +// CHECK-LINUX: error: call to undeclared function '__hlt' + unsigned __int64 check__getReg(void) { unsigned volatile __int64 reg; reg = __getReg(18); diff --git a/clang/test/CodeGen/constexpr-c23-internal-linkage.c b/clang/test/CodeGen/constexpr-c23-internal-linkage.c new file mode 100644 index 00000000000000..1236062272a2d5 --- /dev/null +++ b/clang/test/CodeGen/constexpr-c23-internal-linkage.c @@ -0,0 +1,18 @@ +/* + * RUN: %clang_cc1 -std=c23 -emit-llvm -o - %s | FileCheck %s + */ + +constexpr int var_int = 1; +constexpr char var_char = 'a'; +constexpr float var_float = 2.5; + +const int *p_i = &var_int; +const char *p_c = &var_char; +const float *p_f = &var_float; + +/* +CHECK: @var_int = internal constant i32 1{{.*}} +CHECK: @var_char = internal constant i8 97{{.*}} +CHECK: @var_float = internal constant float 2.5{{.*}} +*/ + diff --git a/clang/test/CodeGen/sanitize-metadata-ignorelist.c b/clang/test/CodeGen/sanitize-metadata-ignorelist.c index 24fb4fa62cc537..4dc8c0c35fefee 100644 --- a/clang/test/CodeGen/sanitize-metadata-ignorelist.c +++ b/clang/test/CodeGen/sanitize-metadata-ignorelist.c @@ -50,6 +50,6 @@ void bar() { __atomic_fetch_add(&y, 2, __ATOMIC_RELAXED); } -// ALLOW: __sanitizer_metadata_covered.module_ctor -// FUN: __sanitizer_metadata_covered.module_ctor -// SRC-NOT: __sanitizer_metadata_covered.module_ctor +// ALLOW: __sanitizer_metadata_covered2.module_ctor +// FUN: __sanitizer_metadata_covered2.module_ctor +// SRC-NOT: __sanitizer_metadata_covered{{.*}}.module_ctor diff --git a/clang/test/CodeGen/sanitize-metadata-nosanitize.c b/clang/test/CodeGen/sanitize-metadata-nosanitize.c index 6414956fb67967..388a3df547a734 100644 --- a/clang/test/CodeGen/sanitize-metadata-nosanitize.c +++ b/clang/test/CodeGen/sanitize-metadata-nosanitize.c @@ -2,13 +2,13 @@ // RUN: %clang_cc1 -O -fexperimental-sanitize-metadata=covered -fexperimental-sanitize-metadata=atomics -fexperimental-sanitize-metadata=uar -triple x86_64-gnu-linux -x c -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK //. -// CHECK: @__start_sanmd_covered = extern_weak hidden global ptr -// CHECK: @__stop_sanmd_covered = extern_weak hidden global ptr -// CHECK: @__start_sanmd_atomics = extern_weak hidden global ptr -// CHECK: @__stop_sanmd_atomics = extern_weak hidden global ptr -// CHECK: @llvm.used = appending global [4 x ptr] [ptr @__sanitizer_metadata_covered.module_ctor, ptr @__sanitizer_metadata_covered.module_dtor, ptr @__sanitizer_metadata_atomics.module_ctor, ptr @__sanitizer_metadata_atomics.module_dtor], section "llvm.metadata" -// CHECK: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered.module_ctor, ptr @__sanitizer_metadata_covered.module_ctor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics.module_ctor, ptr @__sanitizer_metadata_atomics.module_ctor }] -// CHECK: @llvm.global_dtors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered.module_dtor, ptr @__sanitizer_metadata_covered.module_dtor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics.module_dtor, ptr @__sanitizer_metadata_atomics.module_dtor }] +// CHECK: @__start_sanmd_covered2 = extern_weak hidden global ptr +// CHECK: @__stop_sanmd_covered2 = extern_weak hidden global ptr +// CHECK: @__start_sanmd_atomics2 = extern_weak hidden global ptr +// CHECK: @__stop_sanmd_atomics2 = extern_weak hidden global ptr +// CHECK: @llvm.used = appending global [4 x ptr] [ptr @__sanitizer_metadata_covered2.module_ctor, ptr @__sanitizer_metadata_covered2.module_dtor, ptr @__sanitizer_metadata_atomics2.module_ctor, ptr @__sanitizer_metadata_atomics2.module_dtor], section "llvm.metadata" +// CHECK: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered2.module_ctor, ptr @__sanitizer_metadata_covered2.module_ctor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics2.module_ctor, ptr @__sanitizer_metadata_atomics2.module_ctor }] +// CHECK: @llvm.global_dtors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered2.module_dtor, ptr @__sanitizer_metadata_covered2.module_dtor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics2.module_dtor, ptr @__sanitizer_metadata_atomics2.module_dtor }] //. // CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) // CHECK-LABEL: define dso_local void @escape @@ -95,17 +95,17 @@ __attribute__((no_sanitize("all"))) int test_no_sanitize_all(int *x, int *y) { // CHECK: attributes #3 = { mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) "min-legal-vector-width"="0" "no-trapping-math"="true" "no_sanitize_thread" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } // CHECK: attributes #4 = { nounwind "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. -// CHECK: !2 = !{!"sanmd_covered!C", !3} +// CHECK: !2 = !{!"sanmd_covered2!C", !3} // CHECK: !3 = !{i64 0} -// CHECK: !4 = !{!"sanmd_covered!C", !5} +// CHECK: !4 = !{!"sanmd_covered2!C", !5} // CHECK: !5 = !{i64 3} // CHECK: !6 = !{!7, !7, i64 0} // CHECK: !7 = !{!"any pointer", !8, i64 0} // CHECK: !8 = !{!"omnipotent char", !9, i64 0} // CHECK: !9 = !{!"Simple C/C++ TBAA"} -// CHECK: !10 = !{!"sanmd_atomics!C"} +// CHECK: !10 = !{!"sanmd_atomics2!C"} // CHECK: !11 = !{!12, !12, i64 0} // CHECK: !12 = !{!"int", !8, i64 0} -// CHECK: !13 = !{!"sanmd_covered!C", !14} +// CHECK: !13 = !{!"sanmd_covered2!C", !14} // CHECK: !14 = !{i64 2} //. diff --git a/clang/test/CodeGen/sanitize-metadata.c b/clang/test/CodeGen/sanitize-metadata.c index 7e1de0c208845f..55cbc0a19108d2 100644 --- a/clang/test/CodeGen/sanitize-metadata.c +++ b/clang/test/CodeGen/sanitize-metadata.c @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -O -fexperimental-sanitize-metadata=atomics -triple x86_64-gnu-linux -x c -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,ATOMICS // RUN: %clang_cc1 -O -fexperimental-sanitize-metadata=atomics -triple aarch64-gnu-linux -x c -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,ATOMICS -// CHECK: @__start_sanmd_atomics = extern_weak hidden global ptr -// CHECK: @__stop_sanmd_atomics = extern_weak hidden global ptr -// CHECK: @__start_sanmd_covered = extern_weak hidden global ptr -// CHECK: @__stop_sanmd_covered = extern_weak hidden global ptr +// CHECK: @__start_sanmd_atomics2 = extern_weak hidden global ptr +// CHECK: @__stop_sanmd_atomics2 = extern_weak hidden global ptr +// CHECK: @__start_sanmd_covered2 = extern_weak hidden global ptr +// CHECK: @__stop_sanmd_covered2 = extern_weak hidden global ptr int x, y; @@ -21,16 +21,16 @@ int atomics() { __atomic_fetch_add(&x, 1, __ATOMIC_RELAXED); return y; } -// ATOMICS-LABEL: __sanitizer_metadata_atomics.module_ctor -// ATOMICS: call void @__sanitizer_metadata_atomics_add(i32 2, ptr @__start_sanmd_atomics, ptr @__stop_sanmd_atomics) -// ATOMICS-LABEL: __sanitizer_metadata_atomics.module_dtor -// ATOMICS: call void @__sanitizer_metadata_atomics_del(i32 2, ptr @__start_sanmd_atomics, ptr @__stop_sanmd_atomics) +// ATOMICS-LABEL: __sanitizer_metadata_atomics2.module_ctor +// ATOMICS: call void @__sanitizer_metadata_atomics_add(i32 2, ptr @__start_sanmd_atomics2, ptr @__stop_sanmd_atomics2) +// ATOMICS-LABEL: __sanitizer_metadata_atomics2.module_dtor +// ATOMICS: call void @__sanitizer_metadata_atomics_del(i32 2, ptr @__start_sanmd_atomics2, ptr @__stop_sanmd_atomics2) -// CHECK-LABEL: __sanitizer_metadata_covered.module_ctor -// CHECK: call void @__sanitizer_metadata_covered_add(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered) -// CHECK-LABEL: __sanitizer_metadata_covered.module_dtor -// CHECK: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered) +// CHECK-LABEL: __sanitizer_metadata_covered2.module_ctor +// CHECK: call void @__sanitizer_metadata_covered_add(i32 2, ptr @__start_sanmd_covered2, ptr @__stop_sanmd_covered2) +// CHECK-LABEL: __sanitizer_metadata_covered2.module_dtor +// CHECK: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered2, ptr @__stop_sanmd_covered2) -// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered!C", ![[ATOMICS_COVERED_AUX:[0-9]+]]} +// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered2!C", ![[ATOMICS_COVERED_AUX:[0-9]+]]} // ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i64 1} -// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics!C"} +// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics2!C"} diff --git a/clang/test/CodeGen/stdcall-fastcall.c b/clang/test/CodeGen/stdcall-fastcall.c index f6d86d24463f32..5014b7d48e5b9c 100644 --- a/clang/test/CodeGen/stdcall-fastcall.c +++ b/clang/test/CodeGen/stdcall-fastcall.c @@ -151,3 +151,9 @@ void bar13(long long a, int b, int c) { // CHECK: call x86_fastcallcc void @foo13(i64 noundef %{{.*}}, i32 inreg noundef %{{.*}}, i32 inreg noundef % foo13(a, b, c); } + +struct S2 __attribute__((fastcall)) foo14(int a) { + // CHECK-LABEL: define dso_local x86_fastcallcc void @foo14(ptr dead_on_unwind noalias writable sret(%struct.S2) align 4 %agg.result, i32 inreg noundef %a) + struct S2 r = {a}; + return r; +} diff --git a/clang/test/CodeGen/vectorcall.c b/clang/test/CodeGen/vectorcall.c index 71dc3b0b9585a6..cab7fc0972d7b0 100644 --- a/clang/test/CodeGen/vectorcall.c +++ b/clang/test/CodeGen/vectorcall.c @@ -90,7 +90,7 @@ struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;} // X64: define dso_local x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, ptr noundef %b) struct HVA5 __vectorcall hva7(void) {struct HVA5 a = {}; return a;} -// X86: define dso_local x86_vectorcallcc void @"\01hva7@@0"(ptr dead_on_unwind inreg noalias writable sret(%struct.HVA5) align 16 %agg.result) +// X86: define dso_local x86_vectorcallcc void @"\01hva7@@0"(ptr dead_on_unwind noalias writable sret(%struct.HVA5) align 16 %agg.result) // X64: define dso_local x86_vectorcallcc void @"\01hva7@@0"(ptr dead_on_unwind noalias writable sret(%struct.HVA5) align 16 %agg.result) v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;} diff --git a/clang/test/Driver/aarch64-v81a.c b/clang/test/Driver/aarch64-v81a.c index f3873b3ab1864e..e84652ec7f11e1 100644 --- a/clang/test/Driver/aarch64-v81a.c +++ b/clang/test/Driver/aarch64-v81a.c @@ -19,17 +19,3 @@ // RUN: %clang --target=arm64 -mlittle-endian -march=armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-GENERICV81A %s // RUN: %clang --target=arm64 -mlittle-endian -march=armv8.1-a -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-GENERICV81A %s // ARM64-GENERICV81A: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"{{.*}} "-target-feature" "+v8.1a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.1-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_VHE diff --git a/clang/test/Driver/aarch64-v82a.c b/clang/test/Driver/aarch64-v82a.c index 318c270cad3966..9dd355934c1059 100644 --- a/clang/test/Driver/aarch64-v82a.c +++ b/clang/test/Driver/aarch64-v82a.c @@ -13,21 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.2a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.2-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A-BE %s // GENERICV82A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.2a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.2-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE diff --git a/clang/test/Driver/aarch64-v83a.c b/clang/test/Driver/aarch64-v83a.c index f35c500ee0a9be..b0ff9fb3abc24c 100644 --- a/clang/test/Driver/aarch64-v83a.c +++ b/clang/test/Driver/aarch64-v83a.c @@ -13,26 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.3a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV83A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.3-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV83A-BE %s // GENERICV83A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.3a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.3-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE diff --git a/clang/test/Driver/aarch64-v84a.c b/clang/test/Driver/aarch64-v84a.c index d72c79fc14cec5..030990bfe5131c 100644 --- a/clang/test/Driver/aarch64-v84a.c +++ b/clang/test/Driver/aarch64-v84a.c @@ -13,37 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.4a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV84A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.4-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV84A-BE %s // GENERICV84A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.4a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.4-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE diff --git a/clang/test/Driver/aarch64-v85a.c b/clang/test/Driver/aarch64-v85a.c index 06c0989bc8e9b2..3e1e921dcc0133 100644 --- a/clang/test/Driver/aarch64-v85a.c +++ b/clang/test/Driver/aarch64-v85a.c @@ -13,45 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.5a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.5-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BE %s // GENERICV85A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.5a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.5-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE diff --git a/clang/test/Driver/aarch64-v86a.c b/clang/test/Driver/aarch64-v86a.c index 04d372e762a37d..ba2b57979b5187 100644 --- a/clang/test/Driver/aarch64-v86a.c +++ b/clang/test/Driver/aarch64-v86a.c @@ -13,50 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s // GENERICV86A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.6a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.6-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE diff --git a/clang/test/Driver/aarch64-v87a.c b/clang/test/Driver/aarch64-v87a.c index b385e7cfb2ad61..ee4b68882739a8 100644 --- a/clang/test/Driver/aarch64-v87a.c +++ b/clang/test/Driver/aarch64-v87a.c @@ -13,53 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.7a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV87A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.7-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV87A-BE %s // GENERICV87A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.7a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.7-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_HCX -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE -// ARCH-EXTENSION: FEAT_WFxT -// ARCH-EXTENSION: FEAT_XS diff --git a/clang/test/Driver/aarch64-v88a.c b/clang/test/Driver/aarch64-v88a.c index 438796b91fff61..b680c1f567134d 100644 --- a/clang/test/Driver/aarch64-v88a.c +++ b/clang/test/Driver/aarch64-v88a.c @@ -13,56 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.8-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV88A-BE %s // GENERICV88A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.8a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.8-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_HBC -// ARCH-EXTENSION: FEAT_HCX -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MOPS -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NMI, FEAT_GICv3_NMI -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE -// ARCH-EXTENSION: FEAT_WFxT -// ARCH-EXTENSION: FEAT_XS diff --git a/clang/test/Driver/aarch64-v89a.c b/clang/test/Driver/aarch64-v89a.c index 42bcb127cd3bd2..903b793d046ba6 100644 --- a/clang/test/Driver/aarch64-v89a.c +++ b/clang/test/Driver/aarch64-v89a.c @@ -12,62 +12,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.9a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV89A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv8.9-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV89A-BE %s // GENERICV89A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.9a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8.9-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CHK -// ARCH-EXTENSION: FEAT_CLRBHB -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSSC -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_HBC -// ARCH-EXTENSION: FEAT_HCX -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MOPS -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NMI, FEAT_GICv3_NMI -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_PRFMSLC -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RASv2 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SPECRES2 -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE -// ARCH-EXTENSION: FEAT_WFxT -// ARCH-EXTENSION: FEAT_XS diff --git a/clang/test/Driver/aarch64-v8a.c b/clang/test/Driver/aarch64-v8a.c index a3b01560b22a6f..d58e1c29cf2eb1 100644 --- a/clang/test/Driver/aarch64-v8a.c +++ b/clang/test/Driver/aarch64-v8a.c @@ -19,11 +19,3 @@ // RUN: %clang --target=arm64 -mlittle-endian -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-GENERICV8A %s // RUN: %clang --target=arm64 -mlittle-endian -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-GENERICV8A %s // ARM64-GENERICV8A: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"{{.*}} "-target-feature" "+v8a"{{.*}} "-target-feature" "+neon" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv8-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_TRBE diff --git a/clang/test/Driver/aarch64-v91a.c b/clang/test/Driver/aarch64-v91a.c index c5667f8fb3bcdc..80853a59d01537 100644 --- a/clang/test/Driver/aarch64-v91a.c +++ b/clang/test/Driver/aarch64-v91a.c @@ -13,55 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9.1a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV91A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9.1-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV91A-BE %s // GENERICV91A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.1a"{{.*}} "-target-feature" "+sve" "-target-feature" "+sve2" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv9.1-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FP16 -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MEC -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_RME -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_SVE -// ARCH-EXTENSION: FEAT_SVE2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE diff --git a/clang/test/Driver/aarch64-v92a.c b/clang/test/Driver/aarch64-v92a.c index 6088a69312dc4c..ee644cc6f3c620 100644 --- a/clang/test/Driver/aarch64-v92a.c +++ b/clang/test/Driver/aarch64-v92a.c @@ -13,58 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9.2a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV92A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9.2-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV92A-BE %s // GENERICV92A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.2a"{{.*}} "-target-feature" "+sve" "-target-feature" "+sve2" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv9.2-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FP16 -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_HCX -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MEC -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_RME -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_SVE -// ARCH-EXTENSION: FEAT_SVE2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE -// ARCH-EXTENSION: FEAT_WFxT -// ARCH-EXTENSION: FEAT_XS diff --git a/clang/test/Driver/aarch64-v93a.c b/clang/test/Driver/aarch64-v93a.c index 5db3034078ea80..817559e28ccf47 100644 --- a/clang/test/Driver/aarch64-v93a.c +++ b/clang/test/Driver/aarch64-v93a.c @@ -13,61 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9.3a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9.3-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV93A-BE %s // GENERICV93A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.3a" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv9.3-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FP16 -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_HBC -// ARCH-EXTENSION: FEAT_HCX -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MEC -// ARCH-EXTENSION: FEAT_MOPS -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NMI, FEAT_GICv3_NMI -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_RME -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_SVE -// ARCH-EXTENSION: FEAT_SVE2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE -// ARCH-EXTENSION: FEAT_WFxT -// ARCH-EXTENSION: FEAT_XS diff --git a/clang/test/Driver/aarch64-v94a.c b/clang/test/Driver/aarch64-v94a.c index d9f991fc95d3d5..9998cc8a4a2160 100644 --- a/clang/test/Driver/aarch64-v94a.c +++ b/clang/test/Driver/aarch64-v94a.c @@ -13,67 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9.4a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV94A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9.4-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV94A-BE %s // GENERICV94A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.4a" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv9.4-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CHK -// ARCH-EXTENSION: FEAT_CLRBHB -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSSC -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FP16 -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_HBC -// ARCH-EXTENSION: FEAT_HCX -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MEC -// ARCH-EXTENSION: FEAT_MOPS -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NMI, FEAT_GICv3_NMI -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_PRFMSLC -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RASv2 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_RME -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SPECRES2 -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_SVE -// ARCH-EXTENSION: FEAT_SVE2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE -// ARCH-EXTENSION: FEAT_WFxT -// ARCH-EXTENSION: FEAT_XS diff --git a/clang/test/Driver/aarch64-v95a.c b/clang/test/Driver/aarch64-v95a.c index e3e97d2bef13f9..62878f2127626e 100644 --- a/clang/test/Driver/aarch64-v95a.c +++ b/clang/test/Driver/aarch64-v95a.c @@ -25,70 +25,3 @@ // RUN: %clang -target aarch64 -march=armv9.5a+tlbiw -### -c %s 2>&1 | FileCheck -check-prefix=V95A-TLBIW %s // RUN: %clang -target aarch64 -march=armv9.5-a+tlbiw -### -c %s 2>&1 | FileCheck -check-prefix=V95A-TLBIW %s // V95A-TLBIW: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.5a"{{.*}} "-target-feature" "+tlbiw" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv9.5-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AMUv1p1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BF16 -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CHK -// ARCH-EXTENSION: FEAT_CLRBHB -// ARCH-EXTENSION: FEAT_CPA -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSSC -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ECV -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FAMINMAX -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FGT -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FP16 -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_HBC -// ARCH-EXTENSION: FEAT_HCX -// ARCH-EXTENSION: FEAT_I8MM -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_LUT -// ARCH-EXTENSION: FEAT_MEC -// ARCH-EXTENSION: FEAT_MOPS -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NMI, FEAT_GICv3_NMI -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_PRFMSLC -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RASv2 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_RME -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SPECRES2 -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_SVE -// ARCH-EXTENSION: FEAT_SVE2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE -// ARCH-EXTENSION: FEAT_WFxT -// ARCH-EXTENSION: FEAT_XS diff --git a/clang/test/Driver/aarch64-v9a.c b/clang/test/Driver/aarch64-v9a.c index f85e1c409010d5..6732f51996d47f 100644 --- a/clang/test/Driver/aarch64-v9a.c +++ b/clang/test/Driver/aarch64-v9a.c @@ -13,50 +13,3 @@ // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV9A-BE %s // RUN: %clang --target=aarch64_be -mbig-endian -march=armv9-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV9A-BE %s // GENERICV9A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9a"{{.*}} "-target-feature" "+sve" "-target-feature" "+sve2" - -// ===== Architecture extensions ===== - -// RUN: %if aarch64-registered-target %{ %clang -target aarch64 -march=armv9-a --print-enabled-extensions | FileCheck -check-prefix=ARCH-EXTENSION --implicit-check-not FEAT_ %s %} -// ARCH-EXTENSION: FEAT_AMUv1 -// ARCH-EXTENSION: FEAT_AdvSIMD -// ARCH-EXTENSION: FEAT_BTI -// ARCH-EXTENSION: FEAT_CCIDX -// ARCH-EXTENSION: FEAT_CRC32 -// ARCH-EXTENSION: FEAT_CSV2_2 -// ARCH-EXTENSION: FEAT_DIT -// ARCH-EXTENSION: FEAT_DPB -// ARCH-EXTENSION: FEAT_DPB2 -// ARCH-EXTENSION: FEAT_DotProd -// ARCH-EXTENSION: FEAT_ETE -// ARCH-EXTENSION: FEAT_FCMA -// ARCH-EXTENSION: FEAT_FP -// ARCH-EXTENSION: FEAT_FP16 -// ARCH-EXTENSION: FEAT_FRINTTS -// ARCH-EXTENSION: FEAT_FlagM -// ARCH-EXTENSION: FEAT_FlagM2 -// ARCH-EXTENSION: FEAT_JSCVT -// ARCH-EXTENSION: FEAT_LOR -// ARCH-EXTENSION: FEAT_LRCPC -// ARCH-EXTENSION: FEAT_LRCPC2 -// ARCH-EXTENSION: FEAT_LSE -// ARCH-EXTENSION: FEAT_LSE2 -// ARCH-EXTENSION: FEAT_MEC -// ARCH-EXTENSION: FEAT_MPAM -// ARCH-EXTENSION: FEAT_NV, FEAT_NV2 -// ARCH-EXTENSION: FEAT_PAN -// ARCH-EXTENSION: FEAT_PAN2 -// ARCH-EXTENSION: FEAT_PAuth -// ARCH-EXTENSION: FEAT_RAS, FEAT_RASv1p1 -// ARCH-EXTENSION: FEAT_RDM -// ARCH-EXTENSION: FEAT_RME -// ARCH-EXTENSION: FEAT_SB -// ARCH-EXTENSION: FEAT_SEL2 -// ARCH-EXTENSION: FEAT_SPECRES -// ARCH-EXTENSION: FEAT_SSBS, FEAT_SSBS2 -// ARCH-EXTENSION: FEAT_SVE -// ARCH-EXTENSION: FEAT_SVE2 -// ARCH-EXTENSION: FEAT_TLBIOS, FEAT_TLBIRANGE -// ARCH-EXTENSION: FEAT_TRBE -// ARCH-EXTENSION: FEAT_TRF -// ARCH-EXTENSION: FEAT_UAO -// ARCH-EXTENSION: FEAT_VHE diff --git a/clang/test/Driver/frame-pointer.c b/clang/test/Driver/frame-pointer.c index 2b4287bf447ca8..2015fa520c2a2e 100644 --- a/clang/test/Driver/frame-pointer.c +++ b/clang/test/Driver/frame-pointer.c @@ -4,6 +4,9 @@ // RUN: %clang --target=i386-pc-linux -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK3-32 %s // RUN: %clang --target=i386-pc-linux -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECKs-32 %s +// RUN: %clang --target=i386-linux-android -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s +// RUN: %clang --target=i386-linux-android -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s +// RUN: %clang --target=i386-linux-android -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s // RUN: %clang --target=x86_64-pc-linux -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK0-64 %s // RUN: %clang --target=x86_64-pc-linux -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK1-64 %s @@ -12,6 +15,10 @@ // RUN: %clang --target=x86_64-pc-linux -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECKs-64 %s // RUN: %clang --target=x86_64-pc-win32-macho -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-MACHO-64 %s +// RUN: %clang --target=x86_64-linux-android -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s +// RUN: %clang --target=x86_64-linux-android -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s +// RUN: %clang --target=x86_64-linux-android -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s + // Trust the above to get the optimizations right, and just test other targets // that want this by default. // RUN: %clang --target=s390x-pc-linux -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK0-64 %s @@ -57,9 +64,9 @@ // RUN: %clang --target=riscv64-unknown-linux-gnu -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK3-64 %s // RUN: %clang --target=riscv64-unknown-linux-gnu -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECKs-64 %s -// RUN: %clang --target=riscv64-linux-android -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID-64 %s -// RUN: %clang --target=riscv64-linux-android -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID-64 %s -// RUN: %clang --target=riscv64-linux-android -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID-64 %s +// RUN: %clang --target=riscv64-linux-android -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s +// RUN: %clang --target=riscv64-linux-android -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s +// RUN: %clang --target=riscv64-linux-android -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID %s // RUN: %clang --target=loongarch32 -### -S -O0 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK0-32 %s // RUN: %clang --target=loongarch32 -### -S -O1 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK1-32 %s @@ -86,4 +93,4 @@ // CHECKs-64-NOT: -mframe-pointer=all // CHECK-MACHO-64: -mframe-pointer=all -// CHECK-ANDROID-64: -mframe-pointer=non-leaf +// CHECK-ANDROID: -mframe-pointer=non-leaf diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-a64fx.c b/clang/test/Driver/print-enabled-extensions/aarch64-a64fx.c new file mode 100644 index 00000000000000..fc7bd14a70945c --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-a64fx.c @@ -0,0 +1,24 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=a64fx | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-ampere1.c b/clang/test/Driver/print-enabled-extensions/aarch64-ampere1.c new file mode 100644 index 00000000000000..1001a308fe9e75 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-ampere1.c @@ -0,0 +1,54 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=ampere1 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-ampere1a.c b/clang/test/Driver/print-enabled-extensions/aarch64-ampere1a.c new file mode 100644 index 00000000000000..15613e7ca7a18a --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-ampere1a.c @@ -0,0 +1,56 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=ampere1a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-ampere1b.c b/clang/test/Driver/print-enabled-extensions/aarch64-ampere1b.c new file mode 100644 index 00000000000000..73520cc7c18a18 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-ampere1b.c @@ -0,0 +1,60 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=ampere1b | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSSC Enable Common Short Sequence Compression (CSSC) instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a10.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a10.c new file mode 100644 index 00000000000000..10da7640a1ea58 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a10.c @@ -0,0 +1,16 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a10 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a11.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a11.c new file mode 100644 index 00000000000000..05a547599c95c7 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a11.c @@ -0,0 +1,22 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a11 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c new file mode 100644 index 00000000000000..31b4b73004a0a8 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c @@ -0,0 +1,27 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a12 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c new file mode 100644 index 00000000000000..198d0181a00ef0 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c @@ -0,0 +1,40 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a13 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c new file mode 100644 index 00000000000000..c485fb9f1483e5 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c @@ -0,0 +1,47 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a14 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c new file mode 100644 index 00000000000000..729f8efd015648 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c @@ -0,0 +1,53 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a15 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c new file mode 100644 index 00000000000000..7490739c6f4cf7 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c @@ -0,0 +1,54 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a16 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c new file mode 100644 index 00000000000000..6b82e1ad68bef4 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c @@ -0,0 +1,54 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a17 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a7.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a7.c new file mode 100644 index 00000000000000..72bcc86baca4b5 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a7.c @@ -0,0 +1,11 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-a7 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c new file mode 100644 index 00000000000000..a6daa12b47ed20 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c @@ -0,0 +1,60 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=apple-m4 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SME Enable Scalable Matrix Extension (SME) +// CHECK-NEXT: FEAT_SME2 Enable Scalable Matrix Extension 2 (SME2) instructions +// CHECK-NEXT: FEAT_SME_F64F64 Enable Scalable Matrix Extension (SME) F64F64 instructions +// CHECK-NEXT: FEAT_SME_I16I64 Enable Scalable Matrix Extension (SME) I16I64 instructions +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8-a.c new file mode 100644 index 00000000000000..05f5abffb42a64 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8-a.c @@ -0,0 +1,10 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8-r.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8-r.c new file mode 100644 index 00000000000000..52f05fb97c6f0a --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8-r.c @@ -0,0 +1,35 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8-r | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.1-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.1-a.c new file mode 100644 index 00000000000000..b96340847fc2fb --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.1-a.c @@ -0,0 +1,16 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.1-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.2-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.2-a.c new file mode 100644 index 00000000000000..a97a26dac19553 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.2-a.c @@ -0,0 +1,20 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.2-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.3-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.3-a.c new file mode 100644 index 00000000000000..ad85dbc6a1b655 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.3-a.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.3-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.4-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.4-a.c new file mode 100644 index 00000000000000..ef6155c8e40f79 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.4-a.c @@ -0,0 +1,36 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.4-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.5-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.5-a.c new file mode 100644 index 00000000000000..0a9daced1ebef8 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.5-a.c @@ -0,0 +1,44 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.5-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.6-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.6-a.c new file mode 100644 index 00000000000000..53385ab208910b --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.6-a.c @@ -0,0 +1,49 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.6-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.7-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.7-a.c new file mode 100644 index 00000000000000..dfb867bbf0c1cd --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.7-a.c @@ -0,0 +1,52 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.7-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.8-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.8-a.c new file mode 100644 index 00000000000000..b6e726ecdf484b --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.8-a.c @@ -0,0 +1,55 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.8-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NMI, FEAT_GICv3_NMI Enable Armv8.8-A Non-maskable Interrupts +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv8.9-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.9-a.c new file mode 100644 index 00000000000000..89df53e12d4baf --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv8.9-a.c @@ -0,0 +1,61 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv8.9-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CHK Enable Armv8.0-A Check Feature Status Extension +// CHECK-NEXT: FEAT_CLRBHB Enable Clear BHB instruction +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSSC Enable Common Short Sequence Compression (CSSC) instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NMI, FEAT_GICv3_NMI Enable Armv8.8-A Non-maskable Interrupts +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PRFMSLC Enable SLC target for PRFM instruction +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RASv2 Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPECRES2 Enable Speculation Restriction Instruction +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv9-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv9-a.c new file mode 100644 index 00000000000000..cd26351a0267c9 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv9-a.c @@ -0,0 +1,49 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv9-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv9.1-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.1-a.c new file mode 100644 index 00000000000000..31fb009f61c28a --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.1-a.c @@ -0,0 +1,54 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv9.1-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv9.2-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.2-a.c new file mode 100644 index 00000000000000..7f4348366db8b1 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.2-a.c @@ -0,0 +1,57 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv9.2-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv9.3-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.3-a.c new file mode 100644 index 00000000000000..919a239b497d3e --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.3-a.c @@ -0,0 +1,60 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv9.3-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NMI, FEAT_GICv3_NMI Enable Armv8.8-A Non-maskable Interrupts +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv9.4-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.4-a.c new file mode 100644 index 00000000000000..f0c9c596614142 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.4-a.c @@ -0,0 +1,66 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv9.4-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CHK Enable Armv8.0-A Check Feature Status Extension +// CHECK-NEXT: FEAT_CLRBHB Enable Clear BHB instruction +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSSC Enable Common Short Sequence Compression (CSSC) instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NMI, FEAT_GICv3_NMI Enable Armv8.8-A Non-maskable Interrupts +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PRFMSLC Enable SLC target for PRFM instruction +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RASv2 Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPECRES2 Enable Speculation Restriction Instruction +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-armv9.5-a.c b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.5-a.c new file mode 100644 index 00000000000000..1550b43cae5cd5 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-armv9.5-a.c @@ -0,0 +1,69 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -march=armv9.5-a | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CHK Enable Armv8.0-A Check Feature Status Extension +// CHECK-NEXT: FEAT_CLRBHB Enable Clear BHB instruction +// CHECK-NEXT: FEAT_CPA Enable Armv9.5-A Checked Pointer Arithmetic +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSSC Enable Common Short Sequence Compression (CSSC) instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FAMINMAX Enable FAMIN and FAMAX instructions +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_LUT Enable Lookup Table instructions +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NMI, FEAT_GICv3_NMI Enable Armv8.8-A Non-maskable Interrupts +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PRFMSLC Enable SLC target for PRFM instruction +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RASv2 Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPECRES2 Enable Speculation Restriction Instruction +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-carmel.c b/clang/test/Driver/print-enabled-extensions/aarch64-carmel.c new file mode 100644 index 00000000000000..8f2b1f3f42c30c --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-carmel.c @@ -0,0 +1,21 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=carmel | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a34.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a34.c new file mode 100644 index 00000000000000..6299ed620370c1 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a34.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a34 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a35.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a35.c new file mode 100644 index 00000000000000..cdf446463c6470 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a35.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a35 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a510.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a510.c new file mode 100644 index 00000000000000..54068338a9858e --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a510.c @@ -0,0 +1,55 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a510 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520.c new file mode 100644 index 00000000000000..29cec5d2d73af7 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520.c @@ -0,0 +1,61 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a520 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520ae.c new file mode 100644 index 00000000000000..0f3b181c395c62 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a520ae.c @@ -0,0 +1,61 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a520ae | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a53.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a53.c new file mode 100644 index 00000000000000..d978321011032e --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a53.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a53 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a55.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a55.c new file mode 100644 index 00000000000000..6ddb7cedcd2dfa --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a55.c @@ -0,0 +1,24 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a55 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a57.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a57.c new file mode 100644 index 00000000000000..ddd0492e5d33b1 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a57.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a57 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a65.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a65.c new file mode 100644 index 00000000000000..49025bd615d8fc --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a65.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a65 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a65ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a65ae.c new file mode 100644 index 00000000000000..632c8d1fae9e58 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a65ae.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a65ae | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a710.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a710.c new file mode 100644 index 00000000000000..f073db5ea7441c --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a710.c @@ -0,0 +1,55 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a710 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a715.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a715.c new file mode 100644 index 00000000000000..359ca76a243fbc --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a715.c @@ -0,0 +1,56 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a715 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a72.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a72.c new file mode 100644 index 00000000000000..548bd395cd2f19 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a72.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a72 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a720.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a720.c new file mode 100644 index 00000000000000..41fa2756188b72 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a720.c @@ -0,0 +1,63 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a720 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a720ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a720ae.c new file mode 100644 index 00000000000000..b9ac21aa256f7a --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a720ae.c @@ -0,0 +1,63 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a720ae | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a725.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a725.c new file mode 100644 index 00000000000000..823319f108e288 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a725.c @@ -0,0 +1,63 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a725 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a73.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a73.c new file mode 100644 index 00000000000000..b0beca829a94e9 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a73.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a73 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a75.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a75.c new file mode 100644 index 00000000000000..df0e13653a65ee --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a75.c @@ -0,0 +1,24 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a75 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a76.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a76.c new file mode 100644 index 00000000000000..6e2ca4a7cc18fc --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a76.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a76 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a76ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a76ae.c new file mode 100644 index 00000000000000..a58da56fc6661f --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a76ae.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a76ae | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a77.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a77.c new file mode 100644 index 00000000000000..8e81465ea3ad5b --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a77.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a77 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78.c new file mode 100644 index 00000000000000..32a21c1bde159e --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a78 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78ae.c new file mode 100644 index 00000000000000..309cd8715a43f0 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78ae.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a78ae | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78c.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78c.c new file mode 100644 index 00000000000000..30725109fdda98 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-a78c.c @@ -0,0 +1,28 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-a78c | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c new file mode 100644 index 00000000000000..63368f32da8cdd --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c @@ -0,0 +1,36 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-r82 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c new file mode 100644 index 00000000000000..e9ac390283ad4c --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c @@ -0,0 +1,36 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-r82ae | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x1.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x1.c new file mode 100644 index 00000000000000..e2d42447db26a6 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x1.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-x1 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x1c.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x1c.c new file mode 100644 index 00000000000000..0d8b33f075c2c8 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x1c.c @@ -0,0 +1,30 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-x1c | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x2.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x2.c new file mode 100644 index 00000000000000..9a8a4f41c1310d --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x2.c @@ -0,0 +1,55 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-x2 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x3.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x3.c new file mode 100644 index 00000000000000..7fd1cc9de7821d --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x3.c @@ -0,0 +1,56 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-x3 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x4.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x4.c new file mode 100644 index 00000000000000..10b13a0052c76d --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x4.c @@ -0,0 +1,63 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-x4 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x925.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x925.c new file mode 100644 index 00000000000000..09ceb0b358b91c --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-x925.c @@ -0,0 +1,63 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=cortex-x925 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m3.c b/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m3.c new file mode 100644 index 00000000000000..2945fa20a98217 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m3.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=exynos-m3 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m4.c b/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m4.c new file mode 100644 index 00000000000000..2739ac4be74e3f --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m4.c @@ -0,0 +1,23 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=exynos-m4 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m5.c b/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m5.c new file mode 100644 index 00000000000000..4856a6b338a742 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-exynos-m5.c @@ -0,0 +1,23 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=exynos-m5 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-falkor.c b/clang/test/Driver/print-enabled-extensions/aarch64-falkor.c new file mode 100644 index 00000000000000..ea867f2bf3f383 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-falkor.c @@ -0,0 +1,13 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=falkor | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-generic.c b/clang/test/Driver/print-enabled-extensions/aarch64-generic.c new file mode 100644 index 00000000000000..25856f120aa374 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-generic.c @@ -0,0 +1,10 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=generic | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-kryo.c b/clang/test/Driver/print-enabled-extensions/aarch64-kryo.c new file mode 100644 index 00000000000000..3124f85d097d92 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-kryo.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=kryo | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-512tvb.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-512tvb.c new file mode 100644 index 00000000000000..64e0f2eb4f55a2 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-512tvb.c @@ -0,0 +1,48 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-512tvb | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-e1.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-e1.c new file mode 100644 index 00000000000000..cdb67fe5134699 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-e1.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-e1 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n1.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n1.c new file mode 100644 index 00000000000000..beb55733236185 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n1.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-n1 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n2.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n2.c new file mode 100644 index 00000000000000..31ca041fc692fe --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n2.c @@ -0,0 +1,55 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-n2 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n3.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n3.c new file mode 100644 index 00000000000000..2662adf6776918 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-n3.c @@ -0,0 +1,64 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-n3 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v1.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v1.c new file mode 100644 index 00000000000000..467847a575b281 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v1.c @@ -0,0 +1,48 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-v1 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v2.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v2.c new file mode 100644 index 00000000000000..420be38d454f55 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v2.c @@ -0,0 +1,57 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-v2 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v3.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v3.c new file mode 100644 index 00000000000000..d9a7d2f570cd2b --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v3.c @@ -0,0 +1,66 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-v3 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BRBE Enable Branch Record Buffer Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v3ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v3ae.c new file mode 100644 index 00000000000000..21686e5631cb77 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-neoverse-v3ae.c @@ -0,0 +1,66 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=neoverse-v3ae | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BRBE Enable Branch Record Buffer Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-oryon-1.c b/clang/test/Driver/print-enabled-extensions/aarch64-oryon-1.c new file mode 100644 index 00000000000000..7a9b838b08f3f9 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-oryon-1.c @@ -0,0 +1,56 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=oryon-1 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable v8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable v8.5 Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-saphira.c b/clang/test/Driver/print-enabled-extensions/aarch64-saphira.c new file mode 100644 index 00000000000000..dcea3cf3cedf18 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-saphira.c @@ -0,0 +1,38 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=saphira | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable v8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable v8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable v8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable v8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SEL2 Enable v8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable v8.4-A TLB Range and Maintenance Instructions +// CHECK-NEXT: FEAT_TRF Enable v8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-thunderx.c b/clang/test/Driver/print-enabled-extensions/aarch64-thunderx.c new file mode 100644 index 00000000000000..e01903fe6be2f2 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-thunderx.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=thunderx | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-thunderx2t99.c b/clang/test/Driver/print-enabled-extensions/aarch64-thunderx2t99.c new file mode 100644 index 00000000000000..301b8ea880836a --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-thunderx2t99.c @@ -0,0 +1,16 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=thunderx2t99 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-thunderx3t110.c b/clang/test/Driver/print-enabled-extensions/aarch64-thunderx3t110.c new file mode 100644 index 00000000000000..6396cc70579e9e --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-thunderx3t110.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=thunderx3t110 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CCIDX Enable v8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt81.c b/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt81.c new file mode 100644 index 00000000000000..2f10f3925d47d7 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt81.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=thunderxt81 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt83.c b/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt83.c new file mode 100644 index 00000000000000..2e84250d346d43 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt83.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=thunderxt83 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt88.c b/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt88.c new file mode 100644 index 00000000000000..b5fe390cb72847 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-thunderxt88.c @@ -0,0 +1,12 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=thunderxt88 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-tsv110.c b/clang/test/Driver/print-enabled-extensions/aarch64-tsv110.c new file mode 100644 index 00000000000000..8617fbad2afa13 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-tsv110.c @@ -0,0 +1,27 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=tsv110 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: FEAT_DPB Enable v8.2 data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable ARMv8 +// CHECK-NEXT: FEAT_FP16 Full FP16 +// CHECK-NEXT: FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enables ARM v8.1 Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_PAN Enables ARM v8.1 Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable v8.2 PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_UAO Enable v8.2 UAO PState +// CHECK-NEXT: FEAT_VHE Enables ARM v8.1 Virtual Host extension diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c new file mode 100644 index 00000000000000..1af1bd0833d4f4 --- /dev/null +++ b/clang/test/Driver/print-supported-extensions-aarch64.c @@ -0,0 +1,83 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64-linux-gnu --print-supported-extensions | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: All available -march extensions for AArch64 +// CHECK-EMPTY: +// CHECK-NEXT: Name Architecture Feature(s) Description +// CHECK-NEXT: aes FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: b16b16 FEAT_SVE_B16B16 Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions +// CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension +// CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: fcma FEAT_FCMA Enable v8.3-A Floating-point complex number support +// CHECK-NEXT: cpa FEAT_CPA Enable Armv9.5-A Checked Pointer Arithmetic +// CHECK-NEXT: crc FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions +// CHECK-NEXT: crypto FEAT_Crypto Enable cryptographic instructions +// CHECK-NEXT: cssc FEAT_CSSC Enable Common Short Sequence Compression (CSSC) instructions +// CHECK-NEXT: d128 FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and Instructions +// CHECK-NEXT: dit FEAT_DIT Enable v8.4-A Data Independent Timing instructions +// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support +// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension +// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension +// CHECK-NEXT: faminmax FEAT_FAMINMAX Enable FAMIN and FAMAX instructions +// CHECK-NEXT: flagm FEAT_FlagM Enable v8.4-A Flag Manipulation Instructions +// CHECK-NEXT: fp FEAT_FP Enable ARMv8 +// CHECK-NEXT: fp16fml FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: fp8 FEAT_FP8 Enable FP8 instructions +// CHECK-NEXT: fp8dot2 FEAT_FP8DOT2 Enable fp8 2-way dot instructions +// CHECK-NEXT: fp8dot4 FEAT_FP8DOT4 Enable fp8 4-way dot instructions +// CHECK-NEXT: fp8fma FEAT_FP8FMA Enable fp8 multiply-add instructions +// CHECK-NEXT: fp16 FEAT_FP16 Full FP16 +// CHECK-NEXT: gcs FEAT_GCS Enable Armv9.4-A Guarded Call Stack Extension +// CHECK-NEXT: hbc FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension +// CHECK-NEXT: i8mm FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: ite FEAT_ITE Enable Armv9.4-A Instrumentation Extension +// CHECK-NEXT: jscvt FEAT_JSCVT Enable v8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: ls64 FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension +// CHECK-NEXT: lse FEAT_LSE Enable ARMv8.1 Large System Extension (LSE) atomic instructions +// CHECK-NEXT: lse128 FEAT_LSE128 Enable Armv9.4-A 128-bit Atomic Instructions +// CHECK-NEXT: lut FEAT_LUT Enable Lookup Table instructions +// CHECK-NEXT: mops FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions +// CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: simd FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: pauth FEAT_PAuth Enable v8.3-A Pointer Authentication extension +// CHECK-NEXT: pauth-lr FEAT_PAuth_LR Enable Armv9.5-A PAC enhancements +// CHECK-NEXT: pmuv3 FEAT_PMUv3 Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension +// CHECK-NEXT: predres FEAT_SPECRES Enable v8.5a execution and data prediction invalidation instructions +// CHECK-NEXT: rng FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: ras FEAT_RAS, FEAT_RASv1p1 Enable ARMv8 Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: rasv2 FEAT_RASv2 Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: rcpc FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: rcpc3 FEAT_LRCPC3 Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set +// CHECK-NEXT: rdm FEAT_RDM Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: sb FEAT_SB Enable v8.5 Speculation Barrier +// CHECK-NEXT: sha2 FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: sm4 FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support +// CHECK-NEXT: sme FEAT_SME Enable Scalable Matrix Extension (SME) +// CHECK-NEXT: sme-f16f16 FEAT_SME_F16F16 Enable SME non-widening Float16 instructions +// CHECK-NEXT: sme-f64f64 FEAT_SME_F64F64 Enable Scalable Matrix Extension (SME) F64F64 instructions +// CHECK-NEXT: sme-f8f16 FEAT_SME_F8F16 Enable Scalable Matrix Extension (SME) F8F16 instructions +// CHECK-NEXT: sme-f8f32 FEAT_SME_F8F32 Enable Scalable Matrix Extension (SME) F8F32 instructions +// CHECK-NEXT: sme-fa64 FEAT_SME_FA64 Enable the full A64 instruction set in streaming SVE mode +// CHECK-NEXT: sme-i16i64 FEAT_SME_I16I64 Enable Scalable Matrix Extension (SME) I16I64 instructions +// CHECK-NEXT: sme-lutv2 FEAT_SME_LUTv2 Enable Scalable Matrix Extension (SME) LUTv2 instructions +// CHECK-NEXT: sme2 FEAT_SME2 Enable Scalable Matrix Extension 2 (SME2) instructions +// CHECK-NEXT: sme2p1 FEAT_SME2p1 Enable Scalable Matrix Extension 2.1 instructions +// CHECK-NEXT: profile FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: predres2 FEAT_SPECRES2 Enable Speculation Restriction Instruction +// CHECK-NEXT: ssbs FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: ssve-fp8dot2 FEAT_SSVE_FP8DOT2 Enable SVE2 fp8 2-way dot product instructions +// CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 fp8 4-way dot product instructions +// CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 fp8 multiply-add instructions +// CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions +// CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: sve2-sha3 FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions +// CHECK-NEXT: sve2-sm4 FEAT_SVE_SM4 Enable SM4 SVE2 instructions +// CHECK-NEXT: sve2p1 FEAT_SVE2p1 Enable Scalable Vector Extension 2.1 instructions +// CHECK-NEXT: the FEAT_THE Enable Armv8.9-A Translation Hardening Extension +// CHECK-NEXT: tlbiw FEAT_TLBIW Enable ARMv9.5-A TLBI VMALL for Dirty State +// CHECK-NEXT: tme FEAT_TME Enable Transactional Memory Extension +// CHECK-NEXT: wfxt FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction diff --git a/clang/test/Driver/print-supported-extensions-arm.c b/clang/test/Driver/print-supported-extensions-arm.c new file mode 100644 index 00000000000000..0dc2e9fc69738a --- /dev/null +++ b/clang/test/Driver/print-supported-extensions-arm.c @@ -0,0 +1,30 @@ +// REQUIRES: arm-registered-target +// RUN: %clang --target=arm-linux-gnu --print-supported-extensions | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: All available -march extensions for ARM +// CHECK-EMPTY: +// CHECK-NEXT: Name Description +// CHECK-NEXT: crc Enable support for CRC instructions +// CHECK-NEXT: crypto Enable support for Cryptography extensions +// CHECK-NEXT: sha2 Enable SHA1 and SHA256 support +// CHECK-NEXT: aes Enable AES support +// CHECK-NEXT: dotprod Enable support for dot product instructions +// CHECK-NEXT: dsp Supports DSP instructions in ARM and/or Thumb2 +// CHECK-NEXT: mve Support M-Class Vector Extension with integer ops +// CHECK-NEXT: mve.fp Support M-Class Vector Extension with integer and floating ops +// CHECK-NEXT: fp16 Enable half-precision floating point +// CHECK-NEXT: ras Enable Reliability, Availability and Serviceability extensions +// CHECK-NEXT: fp16fml Enable full half-precision floating point fml instructions +// CHECK-NEXT: bf16 Enable support for BFloat16 instructions +// CHECK-NEXT: sb Enable v8.5a Speculation Barrier +// CHECK-NEXT: i8mm Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: lob Enable Low Overhead Branch extensions +// CHECK-NEXT: cdecp0 Coprocessor 0 ISA is CDEv1 +// CHECK-NEXT: cdecp1 Coprocessor 1 ISA is CDEv1 +// CHECK-NEXT: cdecp2 Coprocessor 2 ISA is CDEv1 +// CHECK-NEXT: cdecp3 Coprocessor 3 ISA is CDEv1 +// CHECK-NEXT: cdecp4 Coprocessor 4 ISA is CDEv1 +// CHECK-NEXT: cdecp5 Coprocessor 5 ISA is CDEv1 +// CHECK-NEXT: cdecp6 Coprocessor 6 ISA is CDEv1 +// CHECK-NEXT: cdecp7 Coprocessor 7 ISA is CDEv1 +// CHECK-NEXT: pacbti Enable Pointer Authentication and Branch Target Identification diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c new file mode 100644 index 00000000000000..b58e1514bbfc2c --- /dev/null +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -0,0 +1,197 @@ +// REQUIRES: riscv-registered-target +// RUN: %clang --target=riscv64-linux-gnu --print-supported-extensions | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: All available -march extensions for RISC-V +// CHECK-EMPTY: +// CHECK-NEXT: Name Version Description +// CHECK-NEXT: i 2.1 'I' (Base Integer Instruction Set) +// CHECK-NEXT: e 2.0 Implements RV{32,64}E (provides 16 rather than 32 GPRs) +// CHECK-NEXT: m 2.0 'M' (Integer Multiplication and Division) +// CHECK-NEXT: a 2.1 'A' (Atomic Instructions) +// CHECK-NEXT: f 2.2 'F' (Single-Precision Floating-Point) +// CHECK-NEXT: d 2.2 'D' (Double-Precision Floating-Point) +// CHECK-NEXT: c 2.0 'C' (Compressed Instructions) +// CHECK-NEXT: b 1.0 'B' (the collection of the Zba, Zbb, Zbs extensions) +// CHECK-NEXT: v 1.0 'V' (Vector Extension for Application Processors) +// CHECK-NEXT: h 1.0 'H' (Hypervisor) +// CHECK-NEXT: zic64b 1.0 'Zic64b' (Cache Block Size Is 64 Bytes) +// CHECK-NEXT: zicbom 1.0 'Zicbom' (Cache-Block Management Instructions) +// CHECK-NEXT: zicbop 1.0 'Zicbop' (Cache-Block Prefetch Instructions) +// CHECK-NEXT: zicboz 1.0 'Zicboz' (Cache-Block Zero Instructions) +// CHECK-NEXT: ziccamoa 1.0 'Ziccamoa' (Main Memory Supports All Atomics in A) +// CHECK-NEXT: ziccif 1.0 'Ziccif' (Main Memory Supports Instruction Fetch with Atomicity Requirement) +// CHECK-NEXT: zicclsm 1.0 'Zicclsm' (Main Memory Supports Misaligned Loads/Stores) +// CHECK-NEXT: ziccrse 1.0 'Ziccrse' (Main Memory Supports Forward Progress on LR/SC Sequences) +// CHECK-NEXT: zicntr 2.0 'Zicntr' (Base Counters and Timers) +// CHECK-NEXT: zicond 1.0 'Zicond' (Integer Conditional Operations) +// CHECK-NEXT: zicsr 2.0 'zicsr' (CSRs) +// CHECK-NEXT: zifencei 2.0 'Zifencei' (fence.i) +// CHECK-NEXT: zihintntl 1.0 'Zihintntl' (Non-Temporal Locality Hints) +// CHECK-NEXT: zihintpause 2.0 'Zihintpause' (Pause Hint) +// CHECK-NEXT: zihpm 2.0 'Zihpm' (Hardware Performance Counters) +// CHECK-NEXT: zimop 1.0 'Zimop' (May-Be-Operations) +// CHECK-NEXT: zmmul 1.0 'Zmmul' (Integer Multiplication) +// CHECK-NEXT: za128rs 1.0 'Za128rs' (Reservation Set Size of at Most 128 Bytes) +// CHECK-NEXT: za64rs 1.0 'Za64rs' (Reservation Set Size of at Most 64 Bytes) +// CHECK-NEXT: zaamo 1.0 'Zaamo' (Atomic Memory Operations) +// CHECK-NEXT: zabha 1.0 'Zabha' (Byte and Halfword Atomic Memory Operations) +// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions) +// CHECK-NEXT: zalrsc 1.0 'Zalrsc' (Load-Reserved/Store-Conditional) +// CHECK-NEXT: zama16b 1.0 'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs) +// CHECK-NEXT: zawrs 1.0 'Zawrs' (Wait on Reservation Set) +// CHECK-NEXT: zfa 1.0 'Zfa' (Additional Floating-Point) +// CHECK-NEXT: zfh 1.0 'Zfh' (Half-Precision Floating-Point) +// CHECK-NEXT: zfhmin 1.0 'Zfhmin' (Half-Precision Floating-Point Minimal) +// CHECK-NEXT: zfinx 1.0 'Zfinx' (Float in Integer) +// CHECK-NEXT: zdinx 1.0 'Zdinx' (Double in Integer) +// CHECK-NEXT: zca 1.0 'Zca' (part of the C extension, excluding compressed floating point loads/stores) +// CHECK-NEXT: zcb 1.0 'Zcb' (Compressed basic bit manipulation instructions) +// CHECK-NEXT: zcd 1.0 'Zcd' (Compressed Double-Precision Floating-Point Instructions) +// CHECK-NEXT: zce 1.0 'Zce' (Compressed extensions for microcontrollers) +// CHECK-NEXT: zcf 1.0 'Zcf' (Compressed Single-Precision Floating-Point Instructions) +// CHECK-NEXT: zcmop 1.0 'Zcmop' (Compressed May-Be-Operations) +// CHECK-NEXT: zcmp 1.0 'Zcmp' (sequenced instuctions for code-size reduction) +// CHECK-NEXT: zcmt 1.0 'Zcmt' (table jump instuctions for code-size reduction) +// CHECK-NEXT: zba 1.0 'Zba' (Address Generation Instructions) +// CHECK-NEXT: zbb 1.0 'Zbb' (Basic Bit-Manipulation) +// CHECK-NEXT: zbc 1.0 'Zbc' (Carry-Less Multiplication) +// CHECK-NEXT: zbkb 1.0 'Zbkb' (Bitmanip instructions for Cryptography) +// CHECK-NEXT: zbkc 1.0 'Zbkc' (Carry-less multiply instructions for Cryptography) +// CHECK-NEXT: zbkx 1.0 'Zbkx' (Crossbar permutation instructions) +// CHECK-NEXT: zbs 1.0 'Zbs' (Single-Bit Instructions) +// CHECK-NEXT: zk 1.0 'Zk' (Standard scalar cryptography extension) +// CHECK-NEXT: zkn 1.0 'Zkn' (NIST Algorithm Suite) +// CHECK-NEXT: zknd 1.0 'Zknd' (NIST Suite: AES Decryption) +// CHECK-NEXT: zkne 1.0 'Zkne' (NIST Suite: AES Encryption) +// CHECK-NEXT: zknh 1.0 'Zknh' (NIST Suite: Hash Function Instructions) +// CHECK-NEXT: zkr 1.0 'Zkr' (Entropy Source Extension) +// CHECK-NEXT: zks 1.0 'Zks' (ShangMi Algorithm Suite) +// CHECK-NEXT: zksed 1.0 'Zksed' (ShangMi Suite: SM4 Block Cipher Instructions) +// CHECK-NEXT: zksh 1.0 'Zksh' (ShangMi Suite: SM3 Hash Function Instructions) +// CHECK-NEXT: zkt 1.0 'Zkt' (Data Independent Execution Latency) +// CHECK-NEXT: zvbb 1.0 'Zvbb' (Vector basic bit-manipulation instructions) +// CHECK-NEXT: zvbc 1.0 'Zvbc' (Vector Carryless Multiplication) +// CHECK-NEXT: zve32f 1.0 'Zve32f' (Vector Extensions for Embedded Processors with maximal 32 EEW and F extension) +// CHECK-NEXT: zve32x 1.0 'Zve32x' (Vector Extensions for Embedded Processors with maximal 32 EEW) +// CHECK-NEXT: zve64d 1.0 'Zve64d' (Vector Extensions for Embedded Processors with maximal 64 EEW, F and D extension) +// CHECK-NEXT: zve64f 1.0 'Zve64f' (Vector Extensions for Embedded Processors with maximal 64 EEW and F extension) +// CHECK-NEXT: zve64x 1.0 'Zve64x' (Vector Extensions for Embedded Processors with maximal 64 EEW) +// CHECK-NEXT: zvfh 1.0 'Zvfh' (Vector Half-Precision Floating-Point) +// CHECK-NEXT: zvfhmin 1.0 'Zvfhmin' (Vector Half-Precision Floating-Point Minimal) +// CHECK-NEXT: zvkb 1.0 'Zvkb' (Vector Bit-manipulation used in Cryptography) +// CHECK-NEXT: zvkg 1.0 'Zvkg' (Vector GCM instructions for Cryptography) +// CHECK-NEXT: zvkn 1.0 'Zvkn' (shorthand for 'Zvkned', 'Zvknhb', 'Zvkb', and 'Zvkt') +// CHECK-NEXT: zvknc 1.0 'Zvknc' (shorthand for 'Zvknc' and 'Zvbc') +// CHECK-NEXT: zvkned 1.0 'Zvkned' (Vector AES Encryption & Decryption (Single Round)) +// CHECK-NEXT: zvkng 1.0 'zvkng' (shorthand for 'Zvkn' and 'Zvkg') +// CHECK-NEXT: zvknha 1.0 'Zvknha' (Vector SHA-2 (SHA-256 only)) +// CHECK-NEXT: zvknhb 1.0 'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512)) +// CHECK-NEXT: zvks 1.0 'Zvks' (shorthand for 'Zvksed', 'Zvksh', 'Zvkb', and 'Zvkt') +// CHECK-NEXT: zvksc 1.0 'Zvksc' (shorthand for 'Zvks' and 'Zvbc') +// CHECK-NEXT: zvksed 1.0 'Zvksed' (SM4 Block Cipher Instructions) +// CHECK-NEXT: zvksg 1.0 'Zvksg' (shorthand for 'Zvks' and 'Zvkg') +// CHECK-NEXT: zvksh 1.0 'Zvksh' (SM3 Hash Function Instructions) +// CHECK-NEXT: zvkt 1.0 'Zvkt' (Vector Data-Independent Execution Latency) +// CHECK-NEXT: zvl1024b 1.0 'Zvl' (Minimum Vector Length) 1024 +// CHECK-NEXT: zvl128b 1.0 'Zvl' (Minimum Vector Length) 128 +// CHECK-NEXT: zvl16384b 1.0 'Zvl' (Minimum Vector Length) 16384 +// CHECK-NEXT: zvl2048b 1.0 'Zvl' (Minimum Vector Length) 2048 +// CHECK-NEXT: zvl256b 1.0 'Zvl' (Minimum Vector Length) 256 +// CHECK-NEXT: zvl32768b 1.0 'Zvl' (Minimum Vector Length) 32768 +// CHECK-NEXT: zvl32b 1.0 'Zvl' (Minimum Vector Length) 32 +// CHECK-NEXT: zvl4096b 1.0 'Zvl' (Minimum Vector Length) 4096 +// CHECK-NEXT: zvl512b 1.0 'Zvl' (Minimum Vector Length) 512 +// CHECK-NEXT: zvl64b 1.0 'Zvl' (Minimum Vector Length) 64 +// CHECK-NEXT: zvl65536b 1.0 'Zvl' (Minimum Vector Length) 65536 +// CHECK-NEXT: zvl8192b 1.0 'Zvl' (Minimum Vector Length) 8192 +// CHECK-NEXT: zhinx 1.0 'Zhinx' (Half Float in Integer) +// CHECK-NEXT: zhinxmin 1.0 'Zhinxmin' (Half Float in Integer Minimal) +// CHECK-NEXT: shcounterenw 1.0 'Shcounterenw' (Support writeable hcounteren enable bit for any hpmcounter that is not read-only zero) +// CHECK-NEXT: shgatpa 1.0 'Sgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare) +// CHECK-NEXT: shtvala 1.0 'Shtvala' (htval provides all needed values) +// CHECK-NEXT: shvsatpa 1.0 'Svsatpa' (vsatp supports all modes supported by satp) +// CHECK-NEXT: shvstvala 1.0 'Shvstvala' (vstval provides all needed values) +// CHECK-NEXT: shvstvecd 1.0 'Shvstvecd' (vstvec supports Direct mode) +// CHECK-NEXT: smaia 1.0 'Smaia' (Advanced Interrupt Architecture Machine Level) +// CHECK-NEXT: smcdeleg 1.0 'Smcdeleg' (Counter Delegation Machine Level) +// CHECK-NEXT: smcsrind 1.0 'Smcsrind' (Indirect CSR Access Machine Level) +// CHECK-NEXT: smepmp 1.0 'Smepmp' (Enhanced Physical Memory Protection) +// CHECK-NEXT: smstateen 1.0 'Smstateen' (Machine-mode view of the state-enable extension) +// CHECK-NEXT: ssaia 1.0 'Ssaia' (Advanced Interrupt Architecture Supervisor Level) +// CHECK-NEXT: ssccfg 1.0 'Ssccfg' (Counter Configuration Supervisor Level) +// CHECK-NEXT: ssccptr 1.0 'Ssccptr' (Main memory supports page table reads) +// CHECK-NEXT: sscofpmf 1.0 'Sscofpmf' (Count Overflow and Mode-Based Filtering) +// CHECK-NEXT: sscounterenw 1.0 'Sscounterenw' (Support writeable scounteren enable bit for any hpmcounter that is not read-only zero) +// CHECK-NEXT: sscsrind 1.0 'Sscsrind' (Indirect CSR Access Supervisor Level) +// CHECK-NEXT: ssstateen 1.0 'Ssstateen' (Supervisor-mode view of the state-enable extension) +// CHECK-NEXT: ssstrict 1.0 'Ssstrict' (No non-conforming extensions are present) +// CHECK-NEXT: sstc 1.0 'Sstc' (Supervisor-mode timer interrupts) +// CHECK-NEXT: sstvala 1.0 'Sstvala' (stval provides all needed values) +// CHECK-NEXT: sstvecd 1.0 'Sstvecd' (stvec supports Direct mode) +// CHECK-NEXT: ssu64xl 1.0 'Ssu64xl' (UXLEN=64 supported) +// CHECK-NEXT: svade 1.0 'Svade' (Raise exceptions on improper A/D bits) +// CHECK-NEXT: svadu 1.0 'Svadu' (Hardware A/D updates) +// CHECK-NEXT: svbare 1.0 'Svbare' $(satp mode Bare supported) +// CHECK-NEXT: svinval 1.0 'Svinval' (Fine-Grained Address-Translation Cache Invalidation) +// CHECK-NEXT: svnapot 1.0 'Svnapot' (NAPOT Translation Contiguity) +// CHECK-NEXT: svpbmt 1.0 'Svpbmt' (Page-Based Memory Types) +// CHECK-NEXT: xcvalu 1.0 'XCValu' (CORE-V ALU Operations) +// CHECK-NEXT: xcvbi 1.0 'XCVbi' (CORE-V Immediate Branching) +// CHECK-NEXT: xcvbitmanip 1.0 'XCVbitmanip' (CORE-V Bit Manipulation) +// CHECK-NEXT: xcvelw 1.0 'XCVelw' (CORE-V Event Load Word) +// CHECK-NEXT: xcvmac 1.0 'XCVmac' (CORE-V Multiply-Accumulate) +// CHECK-NEXT: xcvmem 1.0 'XCVmem' (CORE-V Post-incrementing Load & Store) +// CHECK-NEXT: xcvsimd 1.0 'XCVsimd' (CORE-V SIMD ALU) +// CHECK-NEXT: xsfcease 1.0 'XSfcease' (SiFive sf.cease Instruction) +// CHECK-NEXT: xsfvcp 1.0 'XSfvcp' (SiFive Custom Vector Coprocessor Interface Instructions) +// CHECK-NEXT: xsfvfnrclipxfqf 1.0 'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions) +// CHECK-NEXT: xsfvfwmaccqqq 1.0 'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4)) +// CHECK-NEXT: xsfvqmaccdod 1.0 'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2)) +// CHECK-NEXT: xsfvqmaccqoq 1.0 'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4)) +// CHECK-NEXT: xsifivecdiscarddlone 1.0 'XSiFivecdiscarddlone' (SiFive sf.cdiscard.d.l1 Instruction) +// CHECK-NEXT: xsifivecflushdlone 1.0 'XSiFivecflushdlone' (SiFive sf.cflush.d.l1 Instruction) +// CHECK-NEXT: xtheadba 1.0 'xtheadba' (T-Head address calculation instructions) +// CHECK-NEXT: xtheadbb 1.0 'xtheadbb' (T-Head basic bit-manipulation instructions) +// CHECK-NEXT: xtheadbs 1.0 'xtheadbs' (T-Head single-bit instructions) +// CHECK-NEXT: xtheadcmo 1.0 'xtheadcmo' (T-Head cache management instructions) +// CHECK-NEXT: xtheadcondmov 1.0 'xtheadcondmov' (T-Head conditional move instructions) +// CHECK-NEXT: xtheadfmemidx 1.0 'xtheadfmemidx' (T-Head FP Indexed Memory Operations) +// CHECK-NEXT: xtheadmac 1.0 'xtheadmac' (T-Head Multiply-Accumulate Instructions) +// CHECK-NEXT: xtheadmemidx 1.0 'xtheadmemidx' (T-Head Indexed Memory Operations) +// CHECK-NEXT: xtheadmempair 1.0 'xtheadmempair' (T-Head two-GPR Memory Operations) +// CHECK-NEXT: xtheadsync 1.0 'xtheadsync' (T-Head multicore synchronization instructions) +// CHECK-NEXT: xtheadvdot 1.0 'xtheadvdot' (T-Head Vector Extensions for Dot) +// CHECK-NEXT: xventanacondops 1.0 'XVentanaCondOps' (Ventana Conditional Ops) +// CHECK-EMPTY: +// CHECK-NEXT: Experimental extensions +// CHECK-NEXT: zicfilp 0.4 'Zicfilp' (Landing pad) +// CHECK-NEXT: zicfiss 0.4 'Zicfiss' (Shadow stack) +// CHECK-NEXT: zalasr 0.1 'Zalasr' (Load-Acquire and Store-Release Instructions) +// CHECK-NEXT: zfbfmin 1.0 'Zfbfmin' (Scalar BF16 Converts) +// CHECK-NEXT: ztso 0.1 'Ztso' (Memory Model - Total Store Order) +// CHECK-NEXT: zvfbfmin 1.0 'Zvbfmin' (Vector BF16 Converts) +// CHECK-NEXT: zvfbfwma 1.0 'Zvfbfwma' (Vector BF16 widening mul-add) +// CHECK-NEXT: smmpm 1.0 'Smmpm' (Machine-level Pointer Masking for M-mode) +// CHECK-NEXT: smnpm 1.0 'Smnpm' (Machine-level Pointer Masking for next lower privilege mode) +// CHECK-NEXT: ssnpm 1.0 'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode) +// CHECK-NEXT: sspm 1.0 'Sspm' (Indicates Supervisor-mode Pointer Masking) +// CHECK-NEXT: ssqosid 1.0 'Ssqosid' (Quality-of-Service (QoS) Identifiers) +// CHECK-NEXT: supm 1.0 'Supm' (Indicates User-mode Pointer Masking) +// CHECK-EMPTY: +// CHECK-NEXT: Supported Profiles +// CHECK-NEXT: rva20s64 +// CHECK-NEXT: rva20u64 +// CHECK-NEXT: rva22s64 +// CHECK-NEXT: rva22u64 +// CHECK-NEXT: rvi20u32 +// CHECK-NEXT: rvi20u64 +// CHECK-EMPTY: +// CHECK-NEXT: Experimental Profiles +// CHECK-NEXT: rva23s64 +// CHECK-NEXT: rva23u64 +// CHECK-NEXT: rvb23s64 +// CHECK-NEXT: rvb23u64 +// CHECK-NEXT: rvm23u32 +// CHECK-EMPTY: +// CHECK-NEXT: Use -march to specify the target's extension. +// CHECK-NEXT: For example, clang -march=rv32i_v1p0 diff --git a/clang/test/Driver/print-supported-extensions.c b/clang/test/Driver/print-supported-extensions.c index b9b16352f8295b..9557802862733b 100644 --- a/clang/test/Driver/print-supported-extensions.c +++ b/clang/test/Driver/print-supported-extensions.c @@ -1,24 +1,5 @@ -// Test that --print-supported-extensions lists supported -march extensions -// on supported architectures, and errors on unsupported architectures. - -// RUN: %if aarch64-registered-target %{ %clang --target=aarch64-linux-gnu \ -// RUN: --print-supported-extensions 2>&1 | FileCheck %s --check-prefix AARCH64 %} -// AARCH64: All available -march extensions for AArch64 -// AARCH64: Name Architecture Feature(s) Description -// AARCH64: aes FEAT_AES, FEAT_PMULL Enable AES support - -// RUN: %if riscv-registered-target %{ %clang --target=riscv64-linux-gnu \ -// RUN: --print-supported-extensions 2>&1 | FileCheck %s --check-prefix RISCV %} -// RISCV: All available -march extensions for RISC-V -// RISCV: Name Version Description -// RISCV: i 2.1 - -// RUN: %if arm-registered-target %{ %clang --target=arm-linux-gnu \ -// RUN: --print-supported-extensions 2>&1 | FileCheck %s --check-prefix ARM %} -// ARM: All available -march extensions for ARM -// ARM: Name Description -// ARM: crc Enable support for CRC instructions +// Test that --print-supported-extensions errors on unsupported architectures. // RUN: %if x86-registered-target %{ not %clang --target=x86_64-linux-gnu \ // RUN: --print-supported-extensions 2>&1 | FileCheck %s --check-prefix X86 %} -// X86: error: option '--print-supported-extensions' cannot be specified on this target \ No newline at end of file +// X86: error: option '--print-supported-extensions' cannot be specified on this target diff --git a/clang/test/Headers/float.c b/clang/test/Headers/float.c index b9e6e971545e56..218ab58ba62ef6 100644 --- a/clang/test/Headers/float.c +++ b/clang/test/Headers/float.c @@ -2,11 +2,14 @@ // RUN: %clang_cc1 -fsyntax-only -verify -std=c99 -ffreestanding %s // RUN: %clang_cc1 -fsyntax-only -verify -std=c11 -ffreestanding %s // RUN: %clang_cc1 -fsyntax-only -verify -std=c23 -ffreestanding %s -// RUN: %clang_cc1 -fsyntax-only -verify -std=c23 -ffreestanding -ffinite-math-only %s +// RUN: %clang_cc1 -fsyntax-only -verify=finite -std=c23 -ffreestanding -ffinite-math-only %s // RUN: %clang_cc1 -fsyntax-only -verify -xc++ -std=c++11 -ffreestanding %s // RUN: %clang_cc1 -fsyntax-only -verify -xc++ -std=c++14 -ffreestanding %s // RUN: %clang_cc1 -fsyntax-only -verify -xc++ -std=c++17 -ffreestanding %s // RUN: %clang_cc1 -fsyntax-only -verify -xc++ -std=c++23 -ffreestanding %s +// NOTE: C++23 wasn't based on top of C23, so it gets no diagnostics for +// finite-math-only mode as happens in C. When C++ rebased onto C23, that +// is when we'll issue diagnostics for INFINITY and NAN use. // RUN: %clang_cc1 -fsyntax-only -verify -xc++ -std=c++23 -ffreestanding -ffinite-math-only %s // expected-no-diagnostics @@ -218,6 +221,10 @@ #ifndef NAN #error "Mandatory macro NAN is missing." #endif + // FIXME: the NAN diagnostic should only be issued once, not twice. + _Static_assert(_Generic(INFINITY, float : 1, default : 0), ""); // finite-warning {{use of infinity via a macro is undefined behavior due to the currently enabled floating-point options}} + _Static_assert(_Generic(NAN, float : 1, default : 0), ""); // finite-warning {{use of NaN is undefined behavior due to the currently enabled floating-point options}} \ + finite-warning {{use of NaN via a macro is undefined behavior due to the currently enabled floating-point options}} #else #ifdef INFINITY #error "Macro INFINITY should not be defined." diff --git a/clang/test/Index/USR/func-template.cpp b/clang/test/Index/USR/func-template.cpp new file mode 100644 index 00000000000000..c9c82f5e30a751 --- /dev/null +++ b/clang/test/Index/USR/func-template.cpp @@ -0,0 +1,15 @@ +// RUN: c-index-test core -print-source-symbols -- %s | FileCheck %s + +template +struct A { + void f(int); + // CHECK: {{[0-9]+}}:8 | instance-method/C++ | f | c:@ST>1#T@A@F@f#I# | + + template + void f(U); + // CHECK: {{[0-9]+}}:8 | instance-method/C++ | f | c:@ST>1#T@A@FT@>1#Tf#t1.0#v# | + + template<> + void f(int); + // CHECK: {{[0-9]+}}:8 | instance-method/C++ | f | c:@ST>1#T@A@F@f<#I>#I# | +}; diff --git a/clang/test/Modules/use-after-free-2.c b/clang/test/Modules/use-after-free-2.c new file mode 100644 index 00000000000000..0c89c759bcb75c --- /dev/null +++ b/clang/test/Modules/use-after-free-2.c @@ -0,0 +1,180 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- A.modulemap +module A { + header "A.h" + + textual header "A00.h" + textual header "A01.h" + textual header "A02.h" + textual header "A03.h" + textual header "A04.h" + textual header "A05.h" + textual header "A06.h" + textual header "A07.h" + textual header "A08.h" + textual header "A09.h" + + textual header "A10.h" + textual header "A11.h" + textual header "A12.h" + textual header "A13.h" + textual header "A14.h" + textual header "A15.h" + textual header "A16.h" + textual header "A17.h" + textual header "A18.h" + textual header "A19.h" + + textual header "A20.h" + textual header "A21.h" + textual header "A22.h" + textual header "A23.h" + textual header "A24.h" + textual header "A25.h" + textual header "A26.h" + textual header "A27.h" + textual header "A28.h" + textual header "A29.h" + + textual header "A30.h" + textual header "A31.h" + textual header "A32.h" + textual header "A33.h" + textual header "A34.h" + textual header "A35.h" + textual header "A36.h" + textual header "A37.h" + textual header "A38.h" + textual header "A39.h" + + textual header "A40.h" + textual header "A41.h" + textual header "A42.h" + textual header "A43.h" + textual header "A44.h" + textual header "A45.h" +} +//--- A.h + +//--- A00.h +//--- A01.h +//--- A02.h +//--- A03.h +//--- A04.h +//--- A05.h +//--- A06.h +//--- A07.h +//--- A08.h +//--- A09.h + +//--- A10.h +//--- A11.h +//--- A12.h +//--- A13.h +//--- A14.h +//--- A15.h +//--- A16.h +//--- A17.h +//--- A18.h +//--- A19.h + +//--- A20.h +//--- A21.h +//--- A22.h +//--- A23.h +//--- A24.h +//--- A25.h +//--- A26.h +//--- A27.h +//--- A28.h +//--- A29.h + +//--- A30.h +//--- A31.h +//--- A32.h +//--- A33.h +//--- A34.h +//--- A35.h +//--- A36.h +//--- A37.h +//--- A38.h +//--- A39.h + +//--- A40.h +//--- A41.h +//--- A42.h +//--- A43.h +//--- A44.h +//--- A45.h + +//--- B.modulemap +module B { header "B.h" } +//--- B.h +#include "A.h" + +//--- C.modulemap +module C { header "C.h" } +//--- C.h +#include "A00.h" +#include "A01.h" +#include "A02.h" +#include "A03.h" +#include "A04.h" +#include "A05.h" +#include "A06.h" +#include "A07.h" +#include "A08.h" +#include "A09.h" + +#include "A10.h" +#include "A11.h" +#include "A12.h" +#include "A13.h" +#include "A14.h" +#include "A15.h" +#include "A16.h" +#include "A17.h" +#include "A18.h" +#include "A19.h" + +#include "A20.h" +#include "A21.h" +#include "A22.h" +#include "A23.h" +#include "A24.h" +#include "A25.h" +#include "A26.h" +#include "A27.h" +#include "A28.h" +#include "A29.h" + +#include "A30.h" +#include "A31.h" +#include "A32.h" +#include "A33.h" +#include "A34.h" +#include "A35.h" +#include "A36.h" +#include "A37.h" +#include "A38.h" +#include "A39.h" + +#include "A40.h" +#include "A41.h" +#include "A42.h" +#include "A43.h" +#include "A44.h" +#include "A45.h" + +#include "B.h" + +// RUN: %clang_cc1 -fmodules -fno-modules-prune-non-affecting-module-map-files \ +// RUN: -emit-module %t/A.modulemap -fmodule-name=A -o %t/A.pcm +// RUN: %clang_cc1 -fmodules -fno-modules-prune-non-affecting-module-map-files \ +// RUN: -emit-module %t/B.modulemap -fmodule-name=B -o %t/B.pcm \ +// RUN: -fmodule-file=A=%t/A.pcm -fmodule-map-file=%t/A.modulemap +// RUN: %clang_cc1 -fmodules -fno-modules-prune-non-affecting-module-map-files \ +// RUN: -emit-module %t/C.modulemap -fmodule-name=C -o %t/C.pcm \ +// RUN: -fmodule-file=B=%t/B.pcm -fmodule-map-file=%t/B.modulemap diff --git a/clang/test/OpenMP/requires_default_atomic_mem_order.cpp b/clang/test/OpenMP/requires_default_atomic_mem_order.cpp new file mode 100644 index 00000000000000..90d2db4eac20c4 --- /dev/null +++ b/clang/test/OpenMP/requires_default_atomic_mem_order.cpp @@ -0,0 +1,46 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -emit-llvm -fopenmp -triple=x86_64-unknown-linux-gnu \ +// RUN: -DORDERING=seq_cst -o - %s \ +// RUN: | FileCheck %s --check-prefix=SEQ_CST +// RUN: %clang_cc1 -emit-llvm -fopenmp -triple=x86_64-unknown-linux-gnu \ +// RUN: -DORDERING=acq_rel -o - %s \ +// RUN: | FileCheck %s --check-prefix=ACQ_REL +// RUN: %clang_cc1 -emit-llvm -fopenmp -triple=x86_64-unknown-linux-gnu \ +// RUN: -DORDERING=relaxed -o - %s \ +// RUN: | FileCheck %s --check-prefix=RELAXED + +#pragma omp requires atomic_default_mem_order(ORDERING) + +// SEQ_CST-LABEL: define dso_local void @_Z3fooPi( +// SEQ_CST-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// SEQ_CST-NEXT: [[ENTRY:.*:]] +// SEQ_CST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// SEQ_CST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// SEQ_CST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// SEQ_CST-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 seq_cst, align 4 +// SEQ_CST-NEXT: call void @__kmpc_flush(ptr @[[GLOB1:[0-9]+]]) +// SEQ_CST-NEXT: ret void +// +// ACQ_REL-LABEL: define dso_local void @_Z3fooPi( +// ACQ_REL-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// ACQ_REL-NEXT: [[ENTRY:.*:]] +// ACQ_REL-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// ACQ_REL-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// ACQ_REL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// ACQ_REL-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 release, align 4 +// ACQ_REL-NEXT: call void @__kmpc_flush(ptr @[[GLOB1:[0-9]+]]) +// ACQ_REL-NEXT: ret void +// +// RELAXED-LABEL: define dso_local void @_Z3fooPi( +// RELAXED-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// RELAXED-NEXT: [[ENTRY:.*:]] +// RELAXED-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// RELAXED-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// RELAXED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// RELAXED-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4 +// RELAXED-NEXT: ret void +// +void foo(int *x) { + #pragma omp atomic update + *x = *x + 1; +} diff --git a/clang/test/Sema/builtins-microsoft-arm64.c b/clang/test/Sema/builtins-microsoft-arm64.c index 6d0dc09c9ed83f..322cf7542f43a6 100644 --- a/clang/test/Sema/builtins-microsoft-arm64.c +++ b/clang/test/Sema/builtins-microsoft-arm64.c @@ -9,6 +9,11 @@ void check__break(int x) { __break(x); // expected-error {{argument to '__break' must be a constant integer}} } +void check__hlt() { + __hlt(-1); // expected-error-re {{argument value {{.*}} is outside the valid range}} + __hlt(65536); // expected-error-re {{argument value {{.*}} is outside the valid range}} +} + void check__getReg(void) { __getReg(-1); // expected-error-re {{argument value {{.*}} is outside the valid range}} __getReg(32); // expected-error-re {{argument value {{.*}} is outside the valid range}} diff --git a/clang/test/SemaTemplate/generic-lambda.cpp b/clang/test/SemaTemplate/generic-lambda.cpp index fb5fa09ebcc1fd..804eeaa29d6a1d 100644 --- a/clang/test/SemaTemplate/generic-lambda.cpp +++ b/clang/test/SemaTemplate/generic-lambda.cpp @@ -60,3 +60,26 @@ template C1>> auto t3() { template C1>> auto t3(); static_assert(is_same()), X>>>); #endif + +namespace GH95735 { + +int g(int fn) { + return [f = fn](auto tpl) noexcept(noexcept(f)) { return f; }(0); +} + +int foo(auto... fn) { + // FIXME: This one hits the assertion "if the exception specification is dependent, + // then the noexcept expression should be value-dependent" in the constructor of + // FunctionProtoType. + // One possible solution is to update Sema::canThrow() to consider expressions + // (e.g. DeclRefExpr/FunctionParmPackExpr) involving unexpanded parameters as Dependent. + // This would effectively add an extra value-dependent flag to the noexcept expression. + // However, I'm afraid that would also cause ABI breakage. + // [...f = fn](auto tpl) noexcept(noexcept(f)) { return 0; }(0); + [...f = fn](auto tpl) noexcept(noexcept(g(fn...))) { return 0; }(0); + return [...f = fn](auto tpl) noexcept(noexcept(g(f...))) { return 0; }(0); +} + +int v = foo(42); + +} // namespace GH95735 diff --git a/clang/www/c_status.html b/clang/www/c_status.html index 3fb1efc1989e82..890ac1cd922863 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -1260,7 +1260,7 @@

C2y implementation status

Accessing byte arrays
N3254 - Unknown + Yes Slay some earthly demons I @@ -1292,7 +1292,7 @@

C2y implementation status

Remove imaginary types N3274 - Unknown + Yes diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 0c013e6d7cb58d..27e2213e54caa8 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -213,7 +213,7 @@

C++2c implementation status

Deleting a Pointer to an Incomplete Type Should be Ill-formed P3144R2 - Clang 19 + Clang 19 Ordering of constraints involving fold expressions diff --git a/compiler-rt/include/orc_rt/c_api.h b/compiler-rt/include/orc_rt/c_api.h index 628c5cd10676a5..5585cd60868469 100644 --- a/compiler-rt/include/orc_rt/c_api.h +++ b/compiler-rt/include/orc_rt/c_api.h @@ -72,9 +72,6 @@ typedef struct { size_t Size; } orc_rt_CWrapperFunctionResult; -typedef struct orc_rt_CSharedOpaqueJITProcessControl - *orc_rt_SharedJITProcessControlRef; - /** * Zero-initialize an orc_rt_CWrapperFunctionResult. */ diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index 7e8acb3e73eda9..ab2b685e67ef8e 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -367,13 +367,13 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } } +#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 + static const char *getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, const unsigned *Features, unsigned *Type, unsigned *Subtype) { -#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 - // We select CPU strings to match the code in Host.cpp, but we don't use them // in compiler-rt. const char *CPU = 0; @@ -662,14 +662,48 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, const unsigned *Features, unsigned *Type, unsigned *Subtype) { - // We select CPU strings to match the code in Host.cpp, but we don't use them - // in compiler-rt. const char *CPU = 0; switch (Family) { + case 4: + CPU = "i486"; + break; + case 5: + CPU = "pentium"; + switch (Model) { + case 6: + case 7: + CPU = "k6"; + break; + case 8: + CPU = "k6-2"; + break; + case 9: + case 13: + CPU = "k6-3"; + break; + case 10: + CPU = "geode"; + break; + } + break; + case 6: + if (testFeature(FEATURE_SSE)) { + CPU = "athlon-xp"; + break; + } + CPU = "athlon"; + break; + case 15: + if (testFeature(FEATURE_SSE3)) { + CPU = "k8-sse3"; + break; + } + CPU = "k8"; + break; case 16: CPU = "amdfam10"; - *Type = AMDFAM10H; + *Type = AMDFAM10H; // "amdfam10" switch (Model) { case 2: *Subtype = AMDFAM10H_BARCELONA; @@ -745,7 +779,7 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, case 25: CPU = "znver3"; *Type = AMDFAM19H; - if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || + if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) || (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || (Model >= 0x50 && Model <= 0x5f)) { // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 @@ -776,6 +810,8 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, return CPU; } +#undef testFeature + static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, unsigned *Features) { unsigned EAX = 0, EBX = 0; diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index 24ea82a8c5dfc2..db80eb383885e6 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -238,11 +238,6 @@ size_t PageSize() { return PageSizeCached; } -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wcast-function-type-mismatch" -#endif - void SetThreadName(std::thread &thread, const std::string &name) { typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR); HMODULE kbase = GetModuleHandleA("KernelBase.dll"); @@ -260,10 +255,6 @@ void SetThreadName(std::thread &thread, const std::string &name) { } } -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - } // namespace fuzzer #endif // LIBFUZZER_WINDOWS diff --git a/compiler-rt/lib/orc/error.h b/compiler-rt/lib/orc/error.h index 4c378ecc01c4e9..b5da0769c63726 100644 --- a/compiler-rt/lib/orc/error.h +++ b/compiler-rt/lib/orc/error.h @@ -1,4 +1,4 @@ -//===-------- Error.h - Enforced error checking for ORC RT ------*- C++ -*-===// +//===-------- error.h - Enforced error checking for ORC RT ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cpp index a7ffe0e48fabe7..aae3e76ea229ff 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cpp @@ -55,11 +55,6 @@ bool TrySymInitialize() { } // namespace -# if defined(__clang__) -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wcast-function-type-mismatch" -# endif - // Initializes DbgHelp library, if it's not yet initialized. Calls to this // function should be synchronized with respect to other calls to DbgHelp API // (e.g. from WinSymbolizerTool). @@ -138,10 +133,6 @@ void InitializeDbgHelpIfNeeded() { } } -# if defined(__clang__) -# pragma clang diagnostic pop -# endif - bool WinSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *frame) { InitializeDbgHelpIfNeeded(); diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index ec1fb411ff0e25..53168a920e3c6b 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -25,19 +25,29 @@ namespace fir { class StatementContext; +struct IntrinsicHandlerEntry; -// TODO: Error handling interface ? -// TODO: Implementation is incomplete. Many intrinsics to tbd. - -/// Same as the other genIntrinsicCall version above, except that the result -/// deallocation, if required, is not added to a StatementContext. Instead, an -/// extra boolean result indicates if the result must be freed after use. +/// Lower an intrinsic call given the intrinsic \p name, its \p resultType (that +/// must be std::nullopt if and only if this is a subroutine call), and its +/// lowered arguments \p args. The returned pair contains the result value +/// (null mlir::Value for subroutine calls), and a boolean that indicates if +/// this result must be freed after use. std::pair genIntrinsicCall(fir::FirOpBuilder &, mlir::Location, llvm::StringRef name, std::optional resultType, llvm::ArrayRef args, Fortran::lower::AbstractConverter *converter = nullptr); +/// Same as the entry above except that instead of an intrinsic name it takes an +/// IntrinsicHandlerEntry obtained by a previous lookup for a handler to lower +/// this intrinsic (see lookupIntrinsicHandler). +std::pair +genIntrinsicCall(fir::FirOpBuilder &, mlir::Location, + const IntrinsicHandlerEntry &, + std::optional resultType, + llvm::ArrayRef args, + Fortran::lower::AbstractConverter *converter = nullptr); + /// Enums used to templatize and share lowering of MIN and MAX. enum class Extremum { Min, Max }; @@ -156,6 +166,11 @@ struct IntrinsicLibrary { getRuntimeCallGenerator(llvm::StringRef name, mlir::FunctionType soughtFuncType); + /// Helper to generate TODOs for module procedures that must be intercepted in + /// lowering and are not yet implemented. + template + void genModuleProcTODO(llvm::ArrayRef); + void genAbort(llvm::ArrayRef); /// Lowering for the ABS intrinsic. The ABS intrinsic expects one argument in /// the llvm::ArrayRef. The ABS intrinsic is lowered into MLIR/FIR operation @@ -676,6 +691,18 @@ static inline mlir::FunctionType genFuncType(mlir::MLIRContext *context, return mlir::FunctionType::get(context, argTypes, {resType}); } +/// Entry into the tables describing how an intrinsic must be lowered. +struct IntrinsicHandlerEntry { + using RuntimeGeneratorRange = + std::pair; + IntrinsicHandlerEntry(const IntrinsicHandler *handler) : entry{handler} { + assert(handler && "handler must not be nullptr"); + }; + IntrinsicHandlerEntry(RuntimeGeneratorRange rt) : entry{rt} {}; + const IntrinsicArgumentLoweringRules *getArgumentLoweringRules() const; + std::variant entry; +}; + //===----------------------------------------------------------------------===// // Helper functions for argument handling. //===----------------------------------------------------------------------===// @@ -728,6 +755,15 @@ mlir::Value genLibSplitComplexArgsCall(fir::FirOpBuilder &builder, mlir::FunctionType libFuncType, llvm::ArrayRef args); +/// Lookup for a handler or runtime call generator to lower intrinsic +/// \p intrinsicName. +std::optional +lookupIntrinsicHandler(fir::FirOpBuilder &, llvm::StringRef intrinsicName, + std::optional resultType); + +/// Generate a TODO error message for an as yet unimplemented intrinsic. +void crashOnMissingIntrinsic(mlir::Location loc, llvm::StringRef name); + /// Return argument lowering rules for an intrinsic. /// Returns a nullptr if all the intrinsic arguments should be lowered by value. const IntrinsicArgumentLoweringRules * diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index 5e20f9eee4fc9a..54e29a1d60689e 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -1841,7 +1841,7 @@ static std::optional genCustomIntrinsicRefCore( static std::optional genIntrinsicRefCore(Fortran::lower::PreparedActualArguments &loweredActuals, const Fortran::evaluate::SpecificIntrinsic *intrinsic, - const fir::IntrinsicArgumentLoweringRules *argLowering, + const fir::IntrinsicHandlerEntry &intrinsicEntry, CallContext &callContext) { auto &converter = callContext.converter; if (intrinsic && Fortran::lower::intrinsicRequiresCustomOptionalHandling( @@ -1856,6 +1856,8 @@ genIntrinsicRefCore(Fortran::lower::PreparedActualArguments &loweredActuals, auto &stmtCtx = callContext.stmtCtx; fir::FirOpBuilder &builder = callContext.getBuilder(); mlir::Location loc = callContext.loc; + const fir::IntrinsicArgumentLoweringRules *argLowering = + intrinsicEntry.getArgumentLoweringRules(); for (auto arg : llvm::enumerate(loweredActuals)) { if (!arg.value()) { operands.emplace_back(fir::getAbsentIntrinsicArgument()); @@ -1991,7 +1993,7 @@ genIntrinsicRefCore(Fortran::lower::PreparedActualArguments &loweredActuals, const std::string intrinsicName = callContext.getProcedureName(); // Let the intrinsic library lower the intrinsic procedure call. auto [resultExv, mustBeFreed] = genIntrinsicCall( - builder, loc, intrinsicName, scalarResultType, operands, &converter); + builder, loc, intrinsicEntry, scalarResultType, operands, &converter); for (const hlfir::CleanupFunction &fn : cleanupFns) fn(); if (!fir::getBase(resultExv)) @@ -2023,18 +2025,16 @@ genIntrinsicRefCore(Fortran::lower::PreparedActualArguments &loweredActuals, static std::optional genHLFIRIntrinsicRefCore( Fortran::lower::PreparedActualArguments &loweredActuals, const Fortran::evaluate::SpecificIntrinsic *intrinsic, - const fir::IntrinsicArgumentLoweringRules *argLowering, + const fir::IntrinsicHandlerEntry &intrinsicEntry, CallContext &callContext) { - if (!useHlfirIntrinsicOps) - return genIntrinsicRefCore(loweredActuals, intrinsic, argLowering, - callContext); - - fir::FirOpBuilder &builder = callContext.getBuilder(); - mlir::Location loc = callContext.loc; - const std::string intrinsicName = callContext.getProcedureName(); - - // transformational intrinsic ops always have a result type - if (callContext.resultType) { + // Try lowering transformational intrinsic ops to HLFIR ops if enabled + // (transformational always have a result type) + if (useHlfirIntrinsicOps && callContext.resultType) { + fir::FirOpBuilder &builder = callContext.getBuilder(); + mlir::Location loc = callContext.loc; + const std::string intrinsicName = callContext.getProcedureName(); + const fir::IntrinsicArgumentLoweringRules *argLowering = + intrinsicEntry.getArgumentLoweringRules(); std::optional res = Fortran::lower::lowerHlfirIntrinsic(builder, loc, intrinsicName, loweredActuals, argLowering, @@ -2044,7 +2044,7 @@ static std::optional genHLFIRIntrinsicRefCore( } // fallback to calling the intrinsic via fir.call - return genIntrinsicRefCore(loweredActuals, intrinsic, argLowering, + return genIntrinsicRefCore(loweredActuals, intrinsic, intrinsicEntry, callContext); } @@ -2303,13 +2303,13 @@ class ElementalIntrinsicCallBuilder public: ElementalIntrinsicCallBuilder( const Fortran::evaluate::SpecificIntrinsic *intrinsic, - const fir::IntrinsicArgumentLoweringRules *argLowering, bool isFunction) - : intrinsic{intrinsic}, argLowering{argLowering}, isFunction{isFunction} { - } + const fir::IntrinsicHandlerEntry &intrinsicEntry, bool isFunction) + : intrinsic{intrinsic}, intrinsicEntry{intrinsicEntry}, + isFunction{isFunction} {} std::optional genElementalKernel(Fortran::lower::PreparedActualArguments &loweredActuals, CallContext &callContext) { - return genHLFIRIntrinsicRefCore(loweredActuals, intrinsic, argLowering, + return genHLFIRIntrinsicRefCore(loweredActuals, intrinsic, intrinsicEntry, callContext); } // Elemental intrinsic functions cannot modify their arguments. @@ -2363,7 +2363,7 @@ class ElementalIntrinsicCallBuilder private: const Fortran::evaluate::SpecificIntrinsic *intrinsic; - const fir::IntrinsicArgumentLoweringRules *argLowering; + fir::IntrinsicHandlerEntry intrinsicEntry; const bool isFunction; }; } // namespace @@ -2436,11 +2436,16 @@ genCustomElementalIntrinsicRef( callContext.procRef, *intrinsic, callContext.resultType, prepareOptionalArg, prepareOtherArg, converter); - const fir::IntrinsicArgumentLoweringRules *argLowering = - fir::getIntrinsicArgumentLowering(callContext.getProcedureName()); + std::optional intrinsicEntry = + fir::lookupIntrinsicHandler(callContext.getBuilder(), + callContext.getProcedureName(), + callContext.resultType); + assert(intrinsicEntry.has_value() && + "intrinsic with custom handling for OPTIONAL arguments must have " + "lowering entries"); // All of the custom intrinsic elementals with custom handling are pure // functions - return ElementalIntrinsicCallBuilder{intrinsic, argLowering, + return ElementalIntrinsicCallBuilder{intrinsic, *intrinsicEntry, /*isFunction=*/true} .genElementalCall(operands, /*isImpure=*/false, callContext); } @@ -2517,21 +2522,15 @@ genCustomIntrinsicRef(const Fortran::evaluate::SpecificIntrinsic *intrinsic, /// lowered as if it were an intrinsic module procedure (like C_LOC which is a /// procedure from intrinsic module iso_c_binding). Otherwise, \p intrinsic /// must not be null. + static std::optional genIntrinsicRef(const Fortran::evaluate::SpecificIntrinsic *intrinsic, + const fir::IntrinsicHandlerEntry &intrinsicEntry, CallContext &callContext) { mlir::Location loc = callContext.loc; - auto &converter = callContext.converter; - if (intrinsic && Fortran::lower::intrinsicRequiresCustomOptionalHandling( - callContext.procRef, *intrinsic, converter)) { - if (callContext.isElementalProcWithArrayArgs()) - return genCustomElementalIntrinsicRef(intrinsic, callContext); - return genCustomIntrinsicRef(intrinsic, callContext); - } - Fortran::lower::PreparedActualArguments loweredActuals; const fir::IntrinsicArgumentLoweringRules *argLowering = - fir::getIntrinsicArgumentLowering(callContext.getProcedureName()); + intrinsicEntry.getArgumentLoweringRules(); for (const auto &arg : llvm::enumerate(callContext.procRef.arguments())) { if (!arg.value()) { @@ -2581,12 +2580,12 @@ genIntrinsicRef(const Fortran::evaluate::SpecificIntrinsic *intrinsic, if (callContext.isElementalProcWithArrayArgs()) { // All intrinsic elemental functions are pure. const bool isFunction = callContext.resultType.has_value(); - return ElementalIntrinsicCallBuilder{intrinsic, argLowering, isFunction} + return ElementalIntrinsicCallBuilder{intrinsic, intrinsicEntry, isFunction} .genElementalCall(loweredActuals, /*isImpure=*/!isFunction, callContext); } std::optional result = genHLFIRIntrinsicRefCore( - loweredActuals, intrinsic, argLowering, callContext); + loweredActuals, intrinsic, intrinsicEntry, callContext); if (result && mlir::isa(result->getType())) { fir::FirOpBuilder *bldr = &callContext.getBuilder(); callContext.stmtCtx.attachCleanup( @@ -2595,18 +2594,43 @@ genIntrinsicRef(const Fortran::evaluate::SpecificIntrinsic *intrinsic, return result; } +static std::optional +genIntrinsicRef(const Fortran::evaluate::SpecificIntrinsic *intrinsic, + CallContext &callContext) { + mlir::Location loc = callContext.loc; + auto &converter = callContext.converter; + if (intrinsic && Fortran::lower::intrinsicRequiresCustomOptionalHandling( + callContext.procRef, *intrinsic, converter)) { + if (callContext.isElementalProcWithArrayArgs()) + return genCustomElementalIntrinsicRef(intrinsic, callContext); + return genCustomIntrinsicRef(intrinsic, callContext); + } + std::optional intrinsicEntry = + fir::lookupIntrinsicHandler(callContext.getBuilder(), + callContext.getProcedureName(), + callContext.resultType); + if (!intrinsicEntry) + fir::crashOnMissingIntrinsic(loc, callContext.getProcedureName()); + return genIntrinsicRef(intrinsic, *intrinsicEntry, callContext); +} + /// Main entry point to lower procedure references, regardless of what they are. static std::optional genProcedureRef(CallContext &callContext) { mlir::Location loc = callContext.loc; + fir::FirOpBuilder &builder = callContext.getBuilder(); if (auto *intrinsic = callContext.procRef.proc().GetSpecificIntrinsic()) return genIntrinsicRef(intrinsic, callContext); - // If it is an intrinsic module procedure reference - then treat as - // intrinsic unless it is bind(c) (since implementation is external from - // module). + // Intercept non BIND(C) module procedure reference that have lowering + // handlers defined for there name. Otherwise, lower them as user + // procedure calls and expect the implementation to be part of + // runtime libraries with the proper name mangling. if (Fortran::lower::isIntrinsicModuleProcRef(callContext.procRef) && !callContext.isBindcCall()) - return genIntrinsicRef(nullptr, callContext); + if (std::optional intrinsicEntry = + fir::lookupIntrinsicHandler(builder, callContext.getProcedureName(), + callContext.resultType)) + return genIntrinsicRef(nullptr, *intrinsicEntry, callContext); if (callContext.isStatementFunctionCall()) return genStmtFunctionRef(loc, callContext.converter, callContext.symMap, @@ -2641,7 +2665,6 @@ genProcedureRef(CallContext &callContext) { // TYPE(*) cannot be ALLOCATABLE/POINTER (C709) so there is no // need to cover the case of passing an ALLOCATABLE/POINTER to an // OPTIONAL. - fir::FirOpBuilder &builder = callContext.getBuilder(); isPresent = builder.create(loc, builder.getI1Type(), actual) .getResult(); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index a1cef7437fa2d5..f4541bf30676a6 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -95,6 +95,17 @@ static bool isStaticallyPresent(const fir::ExtendedValue &exv) { return !isStaticallyAbsent(exv); } +/// IEEE module procedure names not yet implemented for genModuleProcTODO. +static constexpr char ieee_int[] = "ieee_int"; +static constexpr char ieee_get_underflow_mode[] = "ieee_get_underflow_mode"; +static constexpr char ieee_next_after[] = "ieee_next_after"; +static constexpr char ieee_next_down[] = "ieee_next_down"; +static constexpr char ieee_next_up[] = "ieee_next_up"; +static constexpr char ieee_real[] = "ieee_real"; +static constexpr char ieee_rem[] = "ieee_rem"; +static constexpr char ieee_rint[] = "ieee_rint"; +static constexpr char ieee_set_underflow_mode[] = "ieee_set_underflow_mode"; + using I = IntrinsicLibrary; /// Flag to indicate that an intrinsic argument has to be handled as @@ -321,6 +332,8 @@ static constexpr IntrinsicHandler handlers[]{ {"radix", asValue, handleDynamicOptional}}}, /*isElemental=*/false}, {"ieee_get_status", &I::genIeeeGetOrSetStatus}, + {"ieee_get_underflow_mode", &I::genModuleProcTODO}, + {"ieee_int", &I::genModuleProcTODO}, {"ieee_is_finite", &I::genIeeeIsFinite}, {"ieee_is_nan", &I::genIeeeIsNan}, {"ieee_is_negative", &I::genIeeeIsNegative}, @@ -342,12 +355,18 @@ static constexpr IntrinsicHandler handlers[]{ &I::genIeeeMaxMin}, {"ieee_min_num_mag", &I::genIeeeMaxMin}, + {"ieee_next_after", &I::genModuleProcTODO}, + {"ieee_next_down", &I::genModuleProcTODO}, + {"ieee_next_up", &I::genModuleProcTODO}, {"ieee_quiet_eq", &I::genIeeeQuietCompare}, {"ieee_quiet_ge", &I::genIeeeQuietCompare}, {"ieee_quiet_gt", &I::genIeeeQuietCompare}, {"ieee_quiet_le", &I::genIeeeQuietCompare}, {"ieee_quiet_lt", &I::genIeeeQuietCompare}, {"ieee_quiet_ne", &I::genIeeeQuietCompare}, + {"ieee_real", &I::genModuleProcTODO}, + {"ieee_rem", &I::genModuleProcTODO}, + {"ieee_rint", &I::genModuleProcTODO}, {"ieee_round_eq", &I::genIeeeTypeCompare}, {"ieee_round_ne", &I::genIeeeTypeCompare}, {"ieee_set_flag", &I::genIeeeSetFlagOrHaltingMode}, @@ -360,6 +379,7 @@ static constexpr IntrinsicHandler handlers[]{ {"radix", asValue, handleDynamicOptional}}}, /*isElemental=*/false}, {"ieee_set_status", &I::genIeeeGetOrSetStatus}, + {"ieee_set_underflow_mode", &I::genModuleProcTODO}, {"ieee_signaling_eq", &I::genIeeeSignalingCompare}, {"ieee_signaling_ge", @@ -1493,17 +1513,11 @@ static_assert(mathOps.Verify() && "map must be sorted"); /// \p bestMatchDistance specifies the FunctionDistance between /// the requested operation and the non-exact match. static const MathOperation * -searchMathOperation(fir::FirOpBuilder &builder, llvm::StringRef name, +searchMathOperation(fir::FirOpBuilder &builder, + const IntrinsicHandlerEntry::RuntimeGeneratorRange &range, mlir::FunctionType funcType, const MathOperation **bestNearMatch, FunctionDistance &bestMatchDistance) { - auto range = mathOps.equal_range(name); - auto mod = builder.getModule(); - - // Search ppcMathOps only if targetting PowerPC arch - if (fir::getTargetTriple(mod).isPPC() && range.first == range.second) { - range = checkPPCMathOperationsRange(name); - } for (auto iter = range.first; iter != range.second && iter; ++iter) { const auto &impl = *iter; auto implType = impl.typeGenerator(builder.getContext(), builder); @@ -1649,8 +1663,46 @@ llvm::StringRef genericName(llvm::StringRef specificName) { return name.drop_back(name.size() - size); } +std::optional +lookupRuntimeGenerator(llvm::StringRef name, bool isPPCTarget) { + if (auto range = mathOps.equal_range(name); range.first != range.second) + return std::make_optional( + range); + // Search ppcMathOps only if targetting PowerPC arch + if (isPPCTarget) + if (auto range = checkPPCMathOperationsRange(name); + range.first != range.second) + return std::make_optional( + range); + return std::nullopt; +} + +std::optional +lookupIntrinsicHandler(fir::FirOpBuilder &builder, + llvm::StringRef intrinsicName, + std::optional resultType) { + llvm::StringRef name = genericName(intrinsicName); + if (const IntrinsicHandler *handler = findIntrinsicHandler(name)) + return std::make_optional(handler); + bool isPPCTarget = fir::getTargetTriple(builder.getModule()).isPPC(); + // If targeting PowerPC, check PPC intrinsic handlers. + if (isPPCTarget) + if (const IntrinsicHandler *ppcHandler = findPPCIntrinsicHandler(name)) + return std::make_optional(ppcHandler); + // Subroutines should have a handler. + if (!resultType) + return std::nullopt; + // Try the runtime if no special handler was defined for the + // intrinsic being called. Maths runtime only has numerical elemental. + if (auto runtimeGeneratorRange = lookupRuntimeGenerator(name, isPPCTarget)) + return std::make_optional(*runtimeGeneratorRange); + return std::nullopt; +} + /// Generate a TODO error message for an as yet unimplemented intrinsic. -void crashOnMissingIntrinsic(mlir::Location loc, llvm::StringRef name) { +void crashOnMissingIntrinsic(mlir::Location loc, + llvm::StringRef intrinsicName) { + llvm::StringRef name = genericName(intrinsicName); if (isIntrinsicModuleProcedure(name)) TODO(loc, "intrinsic module procedure: " + llvm::Twine(name)); else if (isCoarrayIntrinsic(name)) @@ -1782,46 +1834,33 @@ invokeHandler(IntrinsicLibrary::DualGenerator generator, return std::invoke(generator, lib, resultType, args); } -std::pair -IntrinsicLibrary::genIntrinsicCall(llvm::StringRef specificName, - std::optional resultType, - llvm::ArrayRef args) { - llvm::StringRef name = genericName(specificName); - if (const IntrinsicHandler *handler = findIntrinsicHandler(name)) { - bool outline = handler->outline || outlineAllIntrinsics; - return {Fortran::common::visit( - [&](auto &generator) -> fir::ExtendedValue { - return invokeHandler(generator, *handler, resultType, args, - outline, *this); - }, - handler->generator), - this->resultMustBeFreed}; - } - - // If targeting PowerPC, check PPC intrinsic handlers. - auto mod = builder.getModule(); - if (fir::getTargetTriple(mod).isPPC()) { - if (const IntrinsicHandler *ppcHandler = findPPCIntrinsicHandler(name)) { - bool outline = ppcHandler->outline || outlineAllIntrinsics; - return {Fortran::common::visit( - [&](auto &generator) -> fir::ExtendedValue { - return invokeHandler(generator, *ppcHandler, resultType, - args, outline, *this); - }, - ppcHandler->generator), - this->resultMustBeFreed}; - } - } - - // Try the runtime if no special handler was defined for the - // intrinsic being called. Maths runtime only has numerical elemental. - // No optional arguments are expected at this point, the code will - // crash if it gets absent optional. +static std::pair genIntrinsicCallHelper( + const IntrinsicHandler *handler, std::optional resultType, + llvm::ArrayRef args, IntrinsicLibrary &lib) { + assert(handler && "must be set"); + bool outline = handler->outline || outlineAllIntrinsics; + return {Fortran::common::visit( + [&](auto &generator) -> fir::ExtendedValue { + return invokeHandler(generator, *handler, resultType, args, + outline, lib); + }, + handler->generator), + lib.resultMustBeFreed}; +} - if (!resultType) - // Subroutine should have a handler, they are likely missing for now. - crashOnMissingIntrinsic(loc, name); +static IntrinsicLibrary::RuntimeCallGenerator getRuntimeCallGeneratorHelper( + const IntrinsicHandlerEntry::RuntimeGeneratorRange &, mlir::FunctionType, + fir::FirOpBuilder &, mlir::Location); +static std::pair genIntrinsicCallHelper( + const IntrinsicHandlerEntry::RuntimeGeneratorRange &range, + std::optional resultType, + llvm::ArrayRef args, IntrinsicLibrary &lib) { + assert(resultType.has_value() && "RuntimeGenerator are for functions only"); + assert(range.first != nullptr && "range should not be empty"); + fir::FirOpBuilder &builder = lib.builder; + mlir::Location loc = lib.loc; + llvm::StringRef name = range.first->key; // FIXME: using toValue to get the type won't work with array arguments. llvm::SmallVector mlirArgs; for (const fir::ExtendedValue &extendedVal : args) { @@ -1836,10 +1875,39 @@ IntrinsicLibrary::genIntrinsicCall(llvm::StringRef specificName, getFunctionType(*resultType, mlirArgs, builder); IntrinsicLibrary::RuntimeCallGenerator runtimeCallGenerator = - getRuntimeCallGenerator(name, soughtFuncType); - return {genElementalCall(runtimeCallGenerator, name, *resultType, args, - /*outline=*/outlineAllIntrinsics), - resultMustBeFreed}; + getRuntimeCallGeneratorHelper(range, soughtFuncType, builder, loc); + return {lib.genElementalCall(runtimeCallGenerator, name, *resultType, args, + /*outline=*/outlineAllIntrinsics), + lib.resultMustBeFreed}; +} + +std::pair +genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc, + const IntrinsicHandlerEntry &intrinsic, + std::optional resultType, + llvm::ArrayRef args, + Fortran::lower::AbstractConverter *converter) { + IntrinsicLibrary library{builder, loc, converter}; + return std::visit( + [&](auto handler) -> auto { + return genIntrinsicCallHelper(handler, resultType, args, library); + }, + intrinsic.entry); +} + +std::pair +IntrinsicLibrary::genIntrinsicCall(llvm::StringRef specificName, + std::optional resultType, + llvm::ArrayRef args) { + std::optional intrinsic = + lookupIntrinsicHandler(builder, specificName, resultType); + if (!intrinsic.has_value()) + crashOnMissingIntrinsic(loc, specificName); + return std::visit( + [&](auto handler) -> auto { + return genIntrinsicCallHelper(handler, resultType, args, *this); + }, + intrinsic->entry); } mlir::Value @@ -2082,19 +2150,19 @@ fir::ExtendedValue IntrinsicLibrary::outlineInExtendedWrapper( return mlir::Value{}; } -IntrinsicLibrary::RuntimeCallGenerator -IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name, - mlir::FunctionType soughtFuncType) { - mlir::FunctionType actualFuncType; - const MathOperation *mathOp = nullptr; - +static IntrinsicLibrary::RuntimeCallGenerator getRuntimeCallGeneratorHelper( + const IntrinsicHandlerEntry::RuntimeGeneratorRange &range, + mlir::FunctionType soughtFuncType, fir::FirOpBuilder &builder, + mlir::Location loc) { + assert(range.first != nullptr && "range should not be empty"); + llvm::StringRef name = range.first->key; // Look for a dedicated math operation generator, which // normally produces a single MLIR operation implementing // the math operation. const MathOperation *bestNearMatch = nullptr; FunctionDistance bestMatchDistance; - mathOp = searchMathOperation(builder, name, soughtFuncType, &bestNearMatch, - bestMatchDistance); + const MathOperation *mathOp = searchMathOperation( + builder, range, soughtFuncType, &bestNearMatch, bestMatchDistance); if (!mathOp && bestNearMatch) { // Use the best near match, optionally issuing an error, // if types conversions cause precision loss. @@ -2109,7 +2177,8 @@ IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name, crashOnMissingIntrinsic(loc, nameAndType); } - actualFuncType = mathOp->typeGenerator(builder.getContext(), builder); + mlir::FunctionType actualFuncType = + mathOp->typeGenerator(builder.getContext(), builder); assert(actualFuncType.getNumResults() == soughtFuncType.getNumResults() && actualFuncType.getNumInputs() == soughtFuncType.getNumInputs() && @@ -2128,6 +2197,17 @@ IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name, }; } +IntrinsicLibrary::RuntimeCallGenerator +IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name, + mlir::FunctionType soughtFuncType) { + bool isPPCTarget = fir::getTargetTriple(builder.getModule()).isPPC(); + std::optional range = + lookupRuntimeGenerator(name, isPPCTarget); + if (!range.has_value()) + crashOnMissingIntrinsic(loc, name); + return getRuntimeCallGeneratorHelper(*range, soughtFuncType, builder, loc); +} + mlir::SymbolRefAttr IntrinsicLibrary::getUnrestrictedIntrinsicSymbolRefAttr( llvm::StringRef name, mlir::FunctionType signature) { // Unrestricted intrinsics signature follows implicit rules: argument @@ -2214,6 +2294,12 @@ mlir::Value IntrinsicLibrary::genConversion(mlir::Type resultType, return builder.convertWithSemantics(loc, resultType, args[0]); } +template +void IntrinsicLibrary::genModuleProcTODO( + llvm::ArrayRef args) { + crashOnMissingIntrinsic(loc, intrinsicName); +} + // ABORT void IntrinsicLibrary::genAbort(llvm::ArrayRef args) { assert(args.size() == 0); @@ -7076,6 +7162,17 @@ getIntrinsicArgumentLowering(llvm::StringRef specificName) { return nullptr; } +const IntrinsicArgumentLoweringRules * +IntrinsicHandlerEntry::getArgumentLoweringRules() const { + if (const IntrinsicHandler *const *handler = + std::get_if(&entry)) { + assert(*handler); + if (!(*handler)->argLoweringRules.hasDefaultRules()) + return &(*handler)->argLoweringRules; + } + return nullptr; +} + /// Return how argument \p argName should be lowered given the rules for the /// intrinsic function. fir::ArgLoweringRule diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 8b62787bb30942..eca762d52a7241 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -200,6 +200,7 @@ func.func @_QPsimd1(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref fir.store %3 to %6 : !fir.ref omp.yield } + omp.terminator } omp.terminator } @@ -225,6 +226,7 @@ func.func @_QPsimd1(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref // CHECK: llvm.store %[[I1]], %[[ARR_I_REF]] : i32, !llvm.ptr // CHECK: omp.yield // CHECK: } +// CHECK: omp.terminator // CHECK: } // CHECK: omp.terminator // CHECK: } @@ -518,6 +520,7 @@ func.func @_QPsimd_with_nested_loop() { fir.store %7 to %3 : !fir.ref omp.yield } + omp.terminator } return } @@ -538,6 +541,7 @@ func.func @_QPsimd_with_nested_loop() { // CHECK: ^bb3: // CHECK: omp.yield // CHECK: } +// CHECK: omp.terminator // CHECK: } // CHECK: llvm.return // CHECK: } diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp index 69adb0c95ba768..7f60c9cc4a2f41 100644 --- a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp +++ b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp @@ -17,7 +17,8 @@ void Benchmark::add_benchmark(Benchmark *benchmark) { benchmarks.push_back(benchmark); } -BenchmarkResult reduce_results(cpp::array &results) { +BenchmarkResult +reduce_results(const cpp::array &results) { BenchmarkResult result; uint64_t cycles_sum = 0; double standard_deviation_sum = 0; @@ -51,16 +52,16 @@ void Benchmark::run_benchmarks() { uint64_t id = gpu::get_thread_id(); gpu::sync_threads(); - for (Benchmark *benchmark : benchmarks) - results[id] = benchmark->run(); + for (Benchmark *b : benchmarks) + results[id] = b->run(); gpu::sync_threads(); if (id == 0) { - for (Benchmark *benchmark : benchmarks) { + for (Benchmark const *b : benchmarks) { BenchmarkResult all_results = reduce_results(results); constexpr auto GREEN = "\033[32m"; constexpr auto RESET = "\033[0m"; - log << GREEN << "[ RUN ] " << RESET << benchmark->get_name() << '\n'; - log << GREEN << "[ OK ] " << RESET << benchmark->get_name() << ": " + log << GREEN << "[ RUN ] " << RESET << b->get_name() << '\n'; + log << GREEN << "[ OK ] " << RESET << b->get_name() << ": " << all_results.cycles << " cycles, " << all_results.min << " min, " << all_results.max << " max, " << all_results.total_iterations << " iterations, " << all_results.total_time << " ns, " @@ -82,7 +83,6 @@ BenchmarkResult benchmark(const BenchmarkOptions &options, uint32_t samples = 0; uint64_t total_time = 0; uint64_t best_guess = 0; - uint64_t total_cycles = 0; uint64_t cycles_squared = 0; uint64_t min = UINT64_MAX; uint64_t max = 0; @@ -92,15 +92,15 @@ BenchmarkResult benchmark(const BenchmarkOptions &options, for (int i = 0; i < overhead_iterations; i++) overhead = cpp::min(overhead, LIBC_NAMESPACE::overhead()); - for (uint64_t time_budget = options.max_duration; time_budget >= 0;) { + for (int64_t time_budget = options.max_duration; time_budget >= 0;) { uint64_t sample_cycles = 0; const clock_t start = static_cast(clock()); for (uint32_t i = 0; i < iterations; i++) { auto wrapper_intermediate = wrapper_func(); - uint64_t result = wrapper_intermediate - overhead; - max = cpp::max(max, result); - min = cpp::min(min, result); - sample_cycles += result; + uint64_t current_result = wrapper_intermediate - overhead; + max = cpp::max(max, current_result); + min = cpp::min(min, current_result); + sample_cycles += current_result; } const clock_t end = clock(); const clock_t duration_ns = @@ -108,7 +108,6 @@ BenchmarkResult benchmark(const BenchmarkOptions &options, total_time += duration_ns; time_budget -= duration_ns; samples++; - total_cycles += sample_cycles; cycles_squared += sample_cycles * sample_cycles; total_iterations += iterations; diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h index 59dd5894620808..ffc858911b1c0a 100644 --- a/libc/benchmarks/gpu/LibcGpuBenchmark.h +++ b/libc/benchmarks/gpu/LibcGpuBenchmark.h @@ -19,8 +19,8 @@ struct BenchmarkOptions { uint32_t max_iterations = 10000000; uint32_t min_samples = 4; uint32_t max_samples = 1000; - uint64_t min_duration = 0; // in nanoseconds (ns) - uint64_t max_duration = 1000 * 1000 * 1000; // 1e9 nanoseconds = 1 second + int64_t min_duration = 0; // in nanoseconds (ns) + int64_t max_duration = 1000 * 1000 * 1000; // 1e9 nanoseconds = 1 second double epsilon = 0.01; double scaling_factor = 1.4; }; diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 6d38bb491044e5..58813f50d101cf 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -104,6 +104,9 @@ function(_get_common_compile_options output_var flags) list(APPEND compile_options "-Wthread-safety") list(APPEND compile_options "-Wglobal-constructors") endif() + if(LIBC_CONF_MATH_OPTIMIZATIONS) + list(APPEND compile_options "-DLIBC_MATH=${LIBC_CONF_MATH_OPTIMIZATIONS}") + endif() elseif(MSVC) list(APPEND compile_options "/EHs-c-") list(APPEND compile_options "/GR-") diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 9651939454ccc5..7705e971aed3e0 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -165,6 +165,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_trailing_zeros_us # stdlib.h entrypoints + libc.src.stdlib._Exit libc.src.stdlib.abort libc.src.stdlib.abs libc.src.stdlib.aligned_alloc @@ -231,6 +232,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.canonicalize libc.src.math.canonicalizef libc.src.math.canonicalizel + libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf libc.src.math.ceill @@ -386,6 +388,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.sqrt libc.src.math.sqrtf libc.src.math.sqrtl + libc.src.math.tan libc.src.math.tanf libc.src.math.tanhf libc.src.math.trunc diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 8b37acef259c4f..4d020fd8a17589 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -161,6 +161,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_trailing_zeros_us # stdlib.h entrypoints + libc.src.stdlib._Exit libc.src.stdlib.abort libc.src.stdlib.abs libc.src.stdlib.aligned_alloc @@ -227,6 +228,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.canonicalize libc.src.math.canonicalizef libc.src.math.canonicalizel + libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf libc.src.math.ceill @@ -381,6 +383,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.sqrt libc.src.math.sqrtf libc.src.math.sqrtl + libc.src.math.tan libc.src.math.tanf libc.src.math.tanhf libc.src.math.trunc diff --git a/libc/config/config.json b/libc/config/config.json index 11433c15d762e6..e8feab20175f4a 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -21,6 +21,16 @@ "doc": "Disable printing fixed point values in printf and friends." } }, + "scanf": { + "LIBC_CONF_SCANF_DISABLE_FLOAT": { + "value": false, + "doc": "Disable parsing floating point values in scanf and friends." + }, + "LIBC_CONF_SCANF_DISABLE_INDEX_MODE": { + "value": false, + "doc": "Disable index mode in the scanf format string." + } + }, "string": { "LIBC_CONF_STRING_UNSAFE_WIDE_READ": { "value": false, @@ -60,5 +70,11 @@ "value": 1073741824, "doc": "Default size for the constinit freelist buffer used for the freelist malloc implementation (default 1o 1GB)." } + }, + "math": { + "LIBC_CONF_MATH_OPTIMIZATIONS": { + "value": 0, + "doc": "Configures optimizations for math functions. Values accepted are LIBC_MATH_SKIP_ACCURATE_PASS, LIBC_MATH_SMALL_TABLES, LIBC_MATH_NO_ERRNO, LIBC_MATH_NO_EXCEPT, and LIBC_MATH_FAST." + } } } diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt index 9eb7d8960c6e49..383118dc781e55 100644 --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -123,6 +123,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrtf libc.src.math.copysign libc.src.math.copysignf libc.src.math.copysignl diff --git a/libc/config/gpu/config.json b/libc/config/gpu/config.json index 53f232e31cc8a4..71107d26ea7ab3 100644 --- a/libc/config/gpu/config.json +++ b/libc/config/gpu/config.json @@ -12,5 +12,18 @@ "LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE": { "value": false } + }, + "scanf": { + "LIBC_CONF_SCANF_DISABLE_FLOAT": { + "value": true + }, + "LIBC_CONF_SCANF_DISABLE_INDEX_MODE": { + "value": true + } + }, + "math": { + "LIBC_CONF_MATH_OPTIMIZATIONS": { + "value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)" + } } } diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index c8d68d61f3212d..62f3f0df247ccc 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -242,6 +242,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atanf libc.src.math.atanh libc.src.math.atanhf + libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf libc.src.math.copysign diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index a6aeb0685bca48..c2b37ff1ceec1c 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -17,6 +17,12 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.ctype.tolower libc.src.ctype.toupper + # dlfcn.h entrypoints + libc.src.dlfcn.dlclose + libc.src.dlfcn.dlerror + libc.src.dlfcn.dlopen + libc.src.dlfcn.dlsym + # errno.h entrypoints libc.src.errno.errno @@ -284,6 +290,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.dup3 libc.src.unistd.execve libc.src.unistd.fchdir + libc.src.unistd.fpathconf libc.src.unistd.fsync libc.src.unistd.ftruncate libc.src.unistd.getcwd @@ -295,6 +302,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.link libc.src.unistd.linkat libc.src.unistd.lseek + libc.src.unistd.pathconf libc.src.unistd.pread libc.src.unistd.pwrite libc.src.unistd.read @@ -337,6 +345,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf libc.src.math.ceill diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index a24514e29334d7..b0ee0e989b5edd 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -216,6 +216,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf libc.src.math.ceill diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 2b7e3d0256fc3a..f7589c6a97a2ea 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -289,6 +289,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.dup3 libc.src.unistd.execve libc.src.unistd.fchdir + libc.src.unistd.fpathconf libc.src.unistd.fsync libc.src.unistd.ftruncate libc.src.unistd.getcwd @@ -300,6 +301,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.link libc.src.unistd.linkat libc.src.unistd.lseek + libc.src.unistd.pathconf libc.src.unistd.pread libc.src.unistd.pwrite libc.src.unistd.read @@ -345,6 +347,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf libc.src.math.ceill diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 271763d8fe869a..c540f50a17bc8f 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -17,6 +17,12 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.ctype.tolower libc.src.ctype.toupper + # dlfcn.h entrypoints + libc.src.dlfcn.dlclose + libc.src.dlfcn.dlerror + libc.src.dlfcn.dlopen + libc.src.dlfcn.dlsym + # errno.h entrypoints libc.src.errno.errno @@ -302,6 +308,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.dup3 libc.src.unistd.execve libc.src.unistd.fchdir + libc.src.unistd.fpathconf libc.src.unistd.fsync libc.src.unistd.ftruncate libc.src.unistd.getcwd @@ -313,6 +320,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.link libc.src.unistd.linkat libc.src.unistd.lseek + libc.src.unistd.pathconf libc.src.unistd.pipe libc.src.unistd.pread libc.src.unistd.pwrite @@ -362,6 +370,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.canonicalize libc.src.math.canonicalizef libc.src.math.canonicalizel + libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf libc.src.math.ceill diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index b33d572cf999ac..499c6bfe3a229c 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -121,6 +121,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrtf libc.src.math.copysign libc.src.math.copysignf libc.src.math.copysignl diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index 016e2e5aa5876e..9c641ef94570f4 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -30,6 +30,8 @@ to learn about the defaults for your platform and target. - ``LIBC_CONF_KEEP_FRAME_POINTER``: Keep frame pointer in functions for better debugging experience. * **"malloc" options** - ``LIBC_CONF_FREELIST_MALLOC_BUFFER_SIZE``: Default size for the constinit freelist buffer used for the freelist malloc implementation (default 1o 1GB). +* **"math" options** + - ``LIBC_CONF_MATH_OPTIMIZATIONS``: Configures optimizations for math functions. Values accepted are LIBC_MATH_SKIP_ACCURATE_PASS, LIBC_MATH_SMALL_TABLES, LIBC_MATH_NO_ERRNO, LIBC_MATH_NO_EXCEPT, and LIBC_MATH_FAST. * **"printf" options** - ``LIBC_CONF_PRINTF_DISABLE_FIXED_POINT``: Disable printing fixed point values in printf and friends. - ``LIBC_CONF_PRINTF_DISABLE_FLOAT``: Disable printing floating point values in printf and friends. @@ -40,6 +42,9 @@ to learn about the defaults for your platform and target. - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a mutex is in contention (default to 100). - ``LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a rwlock is in contention (default to 100). - ``LIBC_CONF_TIMEOUT_ENSURE_MONOTONICITY``: Automatically adjust timeout to CLOCK_MONOTONIC (default to true). POSIX API may require CLOCK_REALTIME, which can be unstable and leading to unexpected behavior. This option will convert the real-time timestamp to monotonic timestamp relative to the time of call. +* **"scanf" options** + - ``LIBC_CONF_SCANF_DISABLE_FLOAT``: Disable parsing floating point values in scanf and friends. + - ``LIBC_CONF_SCANF_DISABLE_INDEX_MODE``: Disable index mode in the scanf format string. * **"string" options** - ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled. - ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen. diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst index c97a539ca8da45..3faae3134ce2a4 100644 --- a/libc/docs/dev/undefined_behavior.rst +++ b/libc/docs/dev/undefined_behavior.rst @@ -89,3 +89,7 @@ The C23 standard states that if the value of the ``rnd`` argument of the the value of a math rounding direction macro, the direction of rounding is unspecified. LLVM's libc chooses to use the ``FP_INT_TONEAREST`` rounding direction in this case. + +Non-const Constant Return Values +-------------------------------- +Some libc functions, like ``dlerror()``, return ``char *`` instead of ``const char *`` and then tell the caller they promise not to to modify this value. Any modification of this value is undefined behavior. diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 422acfcdd4cec2..7914a3d7e6d1a9 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -266,7 +266,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | atanpi | | | | | | 7.12.4.10 | F.10.1.10 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| cbrt | | | | | | 7.12.7.1 | F.10.4.1 | +| cbrt | |check| | | | | | 7.12.7.1 | F.10.4.1 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | compoundn | | | | | | 7.12.7.2 | F.10.4.2 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 66b82c84dac499..1303280c2c5ef8 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -78,6 +78,24 @@ add_proxy_header_library( libc.include.llvm-libc-macros.sys_epoll_macros ) +add_proxy_header_library( + sys_stat_macros + HDRS + sys_stat_macros.h + FULL_BUILD_DEPENDS + libc.include.sys_stat + libc.include.llvm-libc-macros.sys_stat_macros +) + +add_proxy_header_library( + unistd_macros + HDRS + unistd_macros.h + FULL_BUILD_DEPENDS + libc.include.unistd + libc.include.llvm-libc-macros.unistd_macros +) + add_proxy_header_library( time_macros HDRS @@ -97,4 +115,13 @@ add_proxy_header_library( libc.include.float ) +add_proxy_header_library( + limits_macros + HDRS + limits_macros.h + FULL_BUILD_DEPENDS + libc.include.limits + libc.include.llvm-libc-macros.limits_macros +) + add_subdirectory(types) diff --git a/libc/hdr/limits_macros.h b/libc/hdr/limits_macros.h new file mode 100644 index 00000000000000..2dc13b0cca60df --- /dev/null +++ b/libc/hdr/limits_macros.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from limits.h --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_LIMITS_MACROS_H +#define LLVM_LIBC_HDR_LIMITS_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/limits-macros.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_LIMITS_MACROS_H diff --git a/libc/hdr/math_macros.h b/libc/hdr/math_macros.h index d13c5ff7647ad2..863451123f3f80 100644 --- a/libc/hdr/math_macros.h +++ b/libc/hdr/math_macros.h @@ -15,6 +15,11 @@ #else // Overlay mode +// GCC will include CXX headers when __cplusplus is defined. This behavior +// can be suppressed by defining _GLIBCXX_INCLUDE_NEXT_C_HEADERS. +#if defined(__GNUC__) && !defined(__clang__) +#define _GLIBCXX_INCLUDE_NEXT_C_HEADERS +#endif #include // Some older math.h header does not have FP_INT_* constants yet. diff --git a/libc/hdr/sys_stat_macros.h b/libc/hdr/sys_stat_macros.h new file mode 100644 index 00000000000000..cb58d62e1ffb72 --- /dev/null +++ b/libc/hdr/sys_stat_macros.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from sys/stat.h ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_SYS_STAT_MACROS_H +#define LLVM_LIBC_HDR_SYS_STAT_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/sys-stat-macros.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_SYS_STAT_MACROS_H diff --git a/libc/hdr/unistd_macros.h b/libc/hdr/unistd_macros.h new file mode 100644 index 00000000000000..132e123280139f --- /dev/null +++ b/libc/hdr/unistd_macros.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from unistd.h --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_UNISTD_MACROS_H +#define LLVM_LIBC_HDR_UNISTD_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/unistd-macros.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_UNISTD_MACROS_H diff --git a/libc/include/llvm-libc-macros/limits-macros.h b/libc/include/llvm-libc-macros/limits-macros.h index 3fab996b61ac9c..456487e603b254 100644 --- a/libc/include/llvm-libc-macros/limits-macros.h +++ b/libc/include/llvm-libc-macros/limits-macros.h @@ -225,4 +225,16 @@ #define ULLONG_MIN 0ULL #endif // ULLONG_MIN +#ifndef _POSIX_MAX_CANON +#define _POSIX_MAX_CANON 255 +#endif + +#ifndef _POSIX_MAX_INPUT +#define _POSIX_MAX_INPUT 255 +#endif + +#ifndef _POSIX_NAME_MAX +#define _POSIX_PATH_MAX 256 +#endif + #endif // LLVM_LIBC_MACROS_LIMITS_MACROS_H diff --git a/libc/include/llvm-libc-macros/linux/unistd-macros.h b/libc/include/llvm-libc-macros/linux/unistd-macros.h index c5109df435e68d..a4c8e3cd91f7e0 100644 --- a/libc/include/llvm-libc-macros/linux/unistd-macros.h +++ b/libc/include/llvm-libc-macros/linux/unistd-macros.h @@ -18,6 +18,33 @@ #define _SC_PAGESIZE 1 #define _SC_PAGE_SIZE _SC_PAGESIZE +#define _PC_FILESIZEBITS 0 +#define _PC_LINK_MAX 1 +#define _PC_MAX_CANON 2 +#define _PC_MAX_INPUT 3 +#define _PC_NAME_MAX 4 +#define _PC_PATH_MAX 5 +#define _PC_PIPE_BUF 6 +#define _PC_2_SYMLINKS 7 +#define _PC_ALLOC_SIZE_MIN 8 +#define _PC_REC_INCR_XFER_SIZE 9 +#define _PC_REC_MAX_XFER_SIZE 10 +#define _PC_REC_MIN_XFER_SIZE 11 +#define _PC_REC_XFER_ALIGN 12 +#define _PC_SYMLINK_MAX 13 +#define _PC_CHOWN_RESTRICTED 14 +#define _PC_NO_TRUNC 15 +#define _PC_VDISABLE 16 +#define _PC_ASYNC_IO 17 +#define _PC_PRIO_IO 18 +#define _PC_SYNC_IO 19 + +// TODO: Move these limit macros to a separate file +#define _POSIX_CHOWN_RESTRICTED 1 +#define _POSIX_PIPE_BUF 512 +#define _POSIX_NO_TRUNC 1 +#define _POSIX_VDISABLE '\0' + // Macro to set up the call to the __llvm_libc_syscall function // This is to prevent the call from having fewer than 6 arguments, since six // arguments are always passed to the syscall. Unnecessary arguments are diff --git a/libc/include/llvm-libc-macros/math-macros.h b/libc/include/llvm-libc-macros/math-macros.h index 47838969d59aed..efecdf16962a8e 100644 --- a/libc/include/llvm-libc-macros/math-macros.h +++ b/libc/include/llvm-libc-macros/math-macros.h @@ -41,12 +41,10 @@ #define FP_LLOGBNAN LONG_MAX #endif -#ifdef __FAST_MATH__ +#if defined(__NVPTX__) || defined(__AMDGPU__) || defined(__FAST_MATH__) #define math_errhandling 0 #elif defined(__NO_MATH_ERRNO__) #define math_errhandling (MATH_ERREXCEPT) -#elif defined(__NVPTX__) || defined(__AMDGPU__) -#define math_errhandling (MATH_ERRNO) #else #define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT) #endif diff --git a/libc/spec/linux.td b/libc/spec/linux.td index 82630ff413c73d..395c2a6fe853a7 100644 --- a/libc/spec/linux.td +++ b/libc/spec/linux.td @@ -112,7 +112,19 @@ def Linux : StandardSpec<"Linux"> { [], // Macros [], // Types [], // Enumerations - [] // Functions + [ + FunctionSpec< + "prctl", + RetValSpec, + [ + ArgSpec, + ArgSpec, + ArgSpec, + ArgSpec, + ArgSpec, + ] + >, + ] // Functions >; HeaderSpec SysRandom = HeaderSpec< diff --git a/libc/spec/posix.td b/libc/spec/posix.td index d14047548e104f..1878b1ee2ae412 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -222,6 +222,40 @@ def POSIX : StandardSpec<"POSIX"> { [] // Functions >; + HeaderSpec DlFcn = HeaderSpec< + "dlfcn.h", + [ + Macro<"RTLD_LAZY">, + Macro<"RTLD_NOW">, + Macro<"RTLD_GLOBAL">, + Macro<"RTLD_LOCAL">, + ], + [], // Types + [], // Enumerations + [ + FunctionSpec< + "dlclose", + RetValSpec, + [ArgSpec] + >, + FunctionSpec< + "dlerror", + RetValSpec, + [] + >, + FunctionSpec< + "dlopen", + RetValSpec, + [ArgSpec, ArgSpec] + >, + FunctionSpec< + "dlsym", + RetValSpec, + [ArgSpec, ArgSpec] + >, + ] + >; + HeaderSpec FCntl = HeaderSpec< "fcntl.h", [], // Macros @@ -1690,6 +1724,7 @@ def POSIX : StandardSpec<"POSIX"> { ArpaInet, CType, Dirent, + DlFcn, Errno, FCntl, PThread, diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index feaa3fbfa66aa5..aa56152aee1413 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -382,6 +382,8 @@ def StdC : StandardSpec<"stdc"> { ], [], // Enumerations [ + FunctionSpec<"cbrtf", RetValSpec, [ArgSpec]>, + FunctionSpec<"copysign", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"copysignf", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"copysignl", RetValSpec, [ArgSpec, ArgSpec]>, diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt index 09b16be1e2d42e..9597e2380172b5 100644 --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(__support) add_subdirectory(ctype) +add_subdirectory(dlfcn) add_subdirectory(errno) add_subdirectory(fenv) add_subdirectory(inttypes) diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt index 84c5f802710c4a..793d3a121c7427 100644 --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -155,6 +155,8 @@ add_header_library( multiply_add.h DEPENDS libc.src.__support.common + FLAGS + FMA_OPT ) add_header_library( diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h index 13e668becc651a..1674dd0358e883 100644 --- a/libc/src/__support/FPUtil/FEnvImpl.h +++ b/libc/src/__support/FPUtil/FEnvImpl.h @@ -67,6 +67,12 @@ LIBC_INLINE int set_env(const fenv_t *) { return 0; } namespace LIBC_NAMESPACE::fputil { +LIBC_INLINE int clear_except_if_required(int excepts) { + if (math_errhandling & MATH_ERREXCEPT) + return clear_except(excepts); + return 0; +} + LIBC_INLINE int set_except_if_required(int excepts) { if (math_errhandling & MATH_ERREXCEPT) return set_except(excepts); diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index 58097d017a23f3..0b1a8cac21f77f 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -305,23 +305,21 @@ ErrorOr File::seek(long offset, int whence) { auto result = platform_seek(this, offset, whence); if (!result.has_value()) return Error(result.error()); - else - return 0; + return 0; } -ErrorOr File::tell() { +ErrorOr File::tell() { FileLock lock(this); auto seek_target = eof ? SEEK_END : SEEK_CUR; auto result = platform_seek(this, 0, seek_target); if (!result.has_value() || result.value() < 0) return Error(result.error()); - long platform_offset = result.value(); + off_t platform_offset = result.value(); if (prev_op == FileOp::READ) return platform_offset - (read_limit - pos); - else if (prev_op == FileOp::WRITE) + if (prev_op == FileOp::WRITE) return platform_offset + pos; - else - return platform_offset; + return platform_offset; } int File::flush_unlocked() { diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index 06154871485ced..ec543ac1ac5f33 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -16,6 +16,7 @@ #include #include +#include // For off_t. namespace LIBC_NAMESPACE { @@ -45,7 +46,7 @@ class File { using ReadFunc = FileIOResult(File *, void *, size_t); // The SeekFunc is expected to return the current offset of the external // file position indicator. - using SeekFunc = ErrorOr(File *, long, int); + using SeekFunc = ErrorOr(File *, off_t, int); using CloseFunc = int(File *); using ModeFlags = uint32_t; @@ -182,7 +183,7 @@ class File { ErrorOr seek(long offset, int whence); - ErrorOr tell(); + ErrorOr tell(); // If buffer has data written to it, flush it out. Does nothing if the // buffer is currently being used as a read buffer. diff --git a/libc/src/__support/File/linux/CMakeLists.txt b/libc/src/__support/File/linux/CMakeLists.txt index ccf27f73d6aa8c..8436a687116bd9 100644 --- a/libc/src/__support/File/linux/CMakeLists.txt +++ b/libc/src/__support/File/linux/CMakeLists.txt @@ -1,3 +1,4 @@ +# TODO: migrate to proxy headers add_object_library( file SRCS @@ -8,6 +9,7 @@ add_object_library( libc.include.fcntl libc.include.stdio libc.include.sys_syscall + libc.include.sys_stat libc.src.__support.CPP.new libc.src.__support.OSUtil.osutil libc.src.errno.errno diff --git a/libc/src/__support/File/linux/file.cpp b/libc/src/__support/File/linux/file.cpp index 00ff93846c6bbb..0f6ed4f0a5ef49 100644 --- a/libc/src/__support/File/linux/file.cpp +++ b/libc/src/__support/File/linux/file.cpp @@ -42,7 +42,7 @@ FileIOResult linux_file_read(File *f, void *buf, size_t size) { return ret; } -ErrorOr linux_file_seek(File *f, long offset, int whence) { +ErrorOr linux_file_seek(File *f, off_t offset, int whence) { auto *lf = reinterpret_cast(f); auto result = internal::lseekimpl(lf->get_fd(), offset, whence); if (!result.has_value()) diff --git a/libc/src/__support/File/linux/file.h b/libc/src/__support/File/linux/file.h index 7d3770e1cdd713..63b820529932b8 100644 --- a/libc/src/__support/File/linux/file.h +++ b/libc/src/__support/File/linux/file.h @@ -12,7 +12,7 @@ namespace LIBC_NAMESPACE { FileIOResult linux_file_write(File *, const void *, size_t); FileIOResult linux_file_read(File *, void *, size_t); -ErrorOr linux_file_seek(File *, long, int); +ErrorOr linux_file_seek(File *, off_t, int); int linux_file_close(File *); class LinuxFile : public File { diff --git a/libc/src/__support/threads/sleep.h b/libc/src/__support/threads/sleep.h index 9a2dff598ece8b..6433bc3badd505 100644 --- a/libc/src/__support/threads/sleep.h +++ b/libc/src/__support/threads/sleep.h @@ -22,8 +22,10 @@ LIBC_INLINE void sleep_briefly() { __builtin_amdgcn_s_sleep(2); #elif defined(LIBC_TARGET_ARCH_IS_X86) __builtin_ia32_pause(); -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && __has_builtin(__builtin_arm_isb) __builtin_arm_isb(0xf); +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) + asm volatile("isb\n" ::: "memory"); #else // Simply do nothing if sleeping isn't supported on this platform. #endif diff --git a/libc/src/dlfcn/CMakeLists.txt b/libc/src/dlfcn/CMakeLists.txt new file mode 100644 index 00000000000000..e3a51ba65764d4 --- /dev/null +++ b/libc/src/dlfcn/CMakeLists.txt @@ -0,0 +1,40 @@ +add_entrypoint_object( + dlclose + SRCS + dlclose.cpp + HDRS + dlclose.h +) + +add_entrypoint_object( + dlerror + SRCS + dlerror.cpp + HDRS + dlerror.h + DEPENDS + libc.include.dlfcn + libc.src.errno.errno +) + +add_entrypoint_object( + dlopen + SRCS + dlopen.cpp + HDRS + dlopen.h + DEPENDS + libc.include.dlfcn + libc.src.errno.errno +) + +add_entrypoint_object( + dlsym + SRCS + dlsym.cpp + HDRS + dlsym.h + DEPENDS + libc.include.dlfcn + libc.src.errno.errno +) diff --git a/libc/src/dlfcn/dlclose.cpp b/libc/src/dlfcn/dlclose.cpp new file mode 100644 index 00000000000000..1f1bfabd798010 --- /dev/null +++ b/libc/src/dlfcn/dlclose.cpp @@ -0,0 +1,18 @@ +//===-- Implementation of dlclose -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "dlclose.h" + +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +// TODO(@izaakschroeder): https://github.com/llvm/llvm-project/issues/97917 +LLVM_LIBC_FUNCTION(int, dlclose, (void *)) { return -1; } + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/dlfcn/dlclose.h b/libc/src/dlfcn/dlclose.h new file mode 100644 index 00000000000000..27c0207d726e49 --- /dev/null +++ b/libc/src/dlfcn/dlclose.h @@ -0,0 +1,18 @@ +//===-- Implementation header of dlclose ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_DLFCN_DLCLOSE_H +#define LLVM_LIBC_SRC_DLFCN_DLCLOSE_H + +namespace LIBC_NAMESPACE { + +int dlclose(void *); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_DLFCN_DLCLOSE_H diff --git a/libc/src/dlfcn/dlerror.cpp b/libc/src/dlfcn/dlerror.cpp new file mode 100644 index 00000000000000..711b5a39420b66 --- /dev/null +++ b/libc/src/dlfcn/dlerror.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of delerror ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "dlerror.h" + +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +// TODO(@izaakschroeder): https://github.com/llvm/llvm-project/issues/97918 +LLVM_LIBC_FUNCTION(char *, dlerror, ()) { + return const_cast("unsupported"); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/dlfcn/dlerror.h b/libc/src/dlfcn/dlerror.h new file mode 100644 index 00000000000000..966496016d3ebd --- /dev/null +++ b/libc/src/dlfcn/dlerror.h @@ -0,0 +1,18 @@ +//===-- Implementation header of dlerror ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_DLFCN_DLERROR_H +#define LLVM_LIBC_SRC_DLFCN_DLERROR_H + +namespace LIBC_NAMESPACE { + +char *dlerror(); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_DLFCN_DLERROR_H diff --git a/libc/src/dlfcn/dlopen.cpp b/libc/src/dlfcn/dlopen.cpp new file mode 100644 index 00000000000000..9fa4d061c9c82a --- /dev/null +++ b/libc/src/dlfcn/dlopen.cpp @@ -0,0 +1,18 @@ +//===-- Implementation of dlopen -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "dlopen.h" + +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +// TODO(@izaakschroeder): https://github.com/llvm/llvm-project/issues/97919 +LLVM_LIBC_FUNCTION(void *, dlopen, (const char *, int)) { return nullptr; } + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/dlfcn/dlopen.h b/libc/src/dlfcn/dlopen.h new file mode 100644 index 00000000000000..4565953efd4943 --- /dev/null +++ b/libc/src/dlfcn/dlopen.h @@ -0,0 +1,18 @@ +//===-- Implementation header of dlopen -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_DLFCN_DLOPEN_H +#define LLVM_LIBC_SRC_DLFCN_DLOPEN_H + +namespace LIBC_NAMESPACE { + +void *dlopen(const char *, int); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_DLFCN_DLOPEN_H diff --git a/libc/src/dlfcn/dlsym.cpp b/libc/src/dlfcn/dlsym.cpp new file mode 100644 index 00000000000000..4c8dac698f61d7 --- /dev/null +++ b/libc/src/dlfcn/dlsym.cpp @@ -0,0 +1,18 @@ +//===-- Implementation of dlsym ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "dlsym.h" + +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +// TODO(@izaakschroeder): https://github.com/llvm/llvm-project/issues/97920 +LLVM_LIBC_FUNCTION(void *, dlsym, (void *, const char *)) { return nullptr; } + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/dlfcn/dlsym.h b/libc/src/dlfcn/dlsym.h new file mode 100644 index 00000000000000..8157ac3e3fd4ca --- /dev/null +++ b/libc/src/dlfcn/dlsym.h @@ -0,0 +1,18 @@ +//===-- Implementation header of dlsym --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_DLFCN_DLSYM_H +#define LLVM_LIBC_SRC_DLFCN_DLSYM_H + +namespace LIBC_NAMESPACE { + +void *dlsym(void *, const char *); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_DLFCN_DLSYM_H diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index e21011f37b53c4..74c2e4efda617e 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -65,6 +65,8 @@ add_math_entrypoint_object(canonicalizel) add_math_entrypoint_object(canonicalizef16) add_math_entrypoint_object(canonicalizef128) +add_math_entrypoint_object(cbrtf) + add_math_entrypoint_object(ceil) add_math_entrypoint_object(ceilf) add_math_entrypoint_object(ceill) diff --git a/libc/src/math/amdgpu/CMakeLists.txt b/libc/src/math/amdgpu/CMakeLists.txt index 93735a556a31bf..bc81f7b20a71db 100644 --- a/libc/src/math/amdgpu/CMakeLists.txt +++ b/libc/src/math/amdgpu/CMakeLists.txt @@ -286,122 +286,86 @@ add_entrypoint_object( -O2 ) -# The following functions currently are not implemented natively and borrow from -# existing implementations. This will be removed in the future. -add_entrypoint_object( - acos - SRCS - acos.cpp - HDRS - ../acos.h - VENDOR - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 -) - add_entrypoint_object( - acosf + frexp SRCS - acosf.cpp + frexp.cpp HDRS - ../acosf.h + ../frexp.h COMPILE_OPTIONS - ${bitcode_link_flags} -O2 - VENDOR ) add_entrypoint_object( - acosh + frexpf SRCS - acosh.cpp + frexpf.cpp HDRS - ../acosh.h + ../frexpf.h COMPILE_OPTIONS - ${bitcode_link_flags} -O2 - VENDOR ) add_entrypoint_object( - acoshf + scalbn SRCS - acoshf.cpp + scalbn.cpp HDRS - ../acoshf.h + ../scalbn.h COMPILE_OPTIONS - ${bitcode_link_flags} -O2 - VENDOR ) add_entrypoint_object( - asin + scalbnf SRCS - asin.cpp + scalbnf.cpp HDRS - ../asin.h + ../scalbnf.h COMPILE_OPTIONS - ${bitcode_link_flags} -O2 - VENDOR ) add_entrypoint_object( - asinf + ldexp SRCS - asinf.cpp + ldexp.cpp HDRS - ../asinf.h + ../ldexp.h COMPILE_OPTIONS - ${bitcode_link_flags} -O2 - VENDOR ) add_entrypoint_object( - asinh + ldexpf SRCS - asinh.cpp + ldexpf.cpp HDRS - ../asinh.h + ../ldexpf.h COMPILE_OPTIONS - ${bitcode_link_flags} -O2 - VENDOR ) +# The following functions currently are not implemented natively and borrow from +# existing implementations. This will be removed in the future. add_entrypoint_object( - atan + acos SRCS - atan.cpp + acos.cpp HDRS - ../atan.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 + ../acos.h VENDOR -) - -add_entrypoint_object( - atanf - SRCS - atanf.cpp - HDRS - ../atanf.h COMPILE_OPTIONS ${bitcode_link_flags} -O2 - VENDOR ) add_entrypoint_object( - atan2 + acosh SRCS - atan2.cpp + acosh.cpp HDRS - ../atan2.h + ../acosh.h COMPILE_OPTIONS ${bitcode_link_flags} -O2 @@ -409,11 +373,11 @@ add_entrypoint_object( ) add_entrypoint_object( - atan2f + asin SRCS - atan2f.cpp + asin.cpp HDRS - ../atan2f.h + ../asin.h COMPILE_OPTIONS ${bitcode_link_flags} -O2 @@ -421,11 +385,11 @@ add_entrypoint_object( ) add_entrypoint_object( - atanh + asinh SRCS - atanh.cpp + asinh.cpp HDRS - ../atanh.h + ../asinh.h COMPILE_OPTIONS ${bitcode_link_flags} -O2 @@ -433,11 +397,11 @@ add_entrypoint_object( ) add_entrypoint_object( - atanhf + atan SRCS - atanhf.cpp + atan.cpp HDRS - ../atanhf.h + ../atan.h COMPILE_OPTIONS ${bitcode_link_flags} -O2 @@ -445,11 +409,11 @@ add_entrypoint_object( ) add_entrypoint_object( - cos + atan2 SRCS - cos.cpp + atan2.cpp HDRS - ../cos.h + ../atan2.h COMPILE_OPTIONS ${bitcode_link_flags} -O2 @@ -457,11 +421,11 @@ add_entrypoint_object( ) add_entrypoint_object( - cosf + atanh SRCS - cosf.cpp + atanh.cpp HDRS - ../cosf.h + ../atanh.h COMPILE_OPTIONS ${bitcode_link_flags} -O2 @@ -480,18 +444,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - coshf - SRCS - coshf.cpp - HDRS - ../coshf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( erf SRCS @@ -504,258 +456,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - erff - SRCS - erff.cpp - HDRS - ../erff.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp - SRCS - exp.cpp - HDRS - ../exp.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp10 - SRCS - exp10.cpp - HDRS - ../exp10.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp10f - SRCS - exp10f.cpp - HDRS - ../exp10f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp2 - SRCS - exp2.cpp - HDRS - ../exp2.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp2f - SRCS - exp2f.cpp - HDRS - ../exp2f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - expf - SRCS - expf.cpp - HDRS - ../expf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - expm1 - SRCS - expm1.cpp - HDRS - ../expm1.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - expm1f - SRCS - expm1f.cpp - HDRS - ../expm1f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - fdim - SRCS - fdim.cpp - HDRS - ../fdim.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - fdimf - SRCS - fdimf.cpp - HDRS - ../fdimf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - hypot - SRCS - hypot.cpp - HDRS - ../hypot.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - hypotf - SRCS - hypotf.cpp - HDRS - ../hypotf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - ilogb - SRCS - ilogb.cpp - HDRS - ../ilogb.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - ilogbf - SRCS - ilogbf.cpp - HDRS - ../ilogbf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log10 - SRCS - log10.cpp - HDRS - ../log10.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log10f - SRCS - log10f.cpp - HDRS - ../log10f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log2 - SRCS - log2.cpp - HDRS - ../log2.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log2f - SRCS - log2f.cpp - HDRS - ../log2f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log - SRCS - log.cpp - HDRS - ../log.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - logf - SRCS - logf.cpp - HDRS - ../logf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( lrint SRCS @@ -780,54 +480,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - ldexp - SRCS - ldexp.cpp - HDRS - ../ldexp.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - ldexpf - SRCS - ldexpf.cpp - HDRS - ../ldexpf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log1p - SRCS - log1p.cpp - HDRS - ../log1p.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log1pf - SRCS - log1pf.cpp - HDRS - ../log1pf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( llrint SRCS @@ -852,79 +504,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - remquo - SRCS - remquo.cpp - HDRS - ../remquo.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - remquof - SRCS - remquof.cpp - HDRS - ../remquof.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - scalbn - SRCS - scalbn.cpp - HDRS - ../scalbn.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - scalbnf - SRCS - scalbnf.cpp - HDRS - ../scalbnf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - - -add_entrypoint_object( - nextafter - SRCS - nextafter.cpp - HDRS - ../nextafter.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - nextafterf - SRCS - nextafterf.cpp - HDRS - ../nextafterf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( pow SRCS @@ -937,66 +516,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - powf - SRCS - powf.cpp - HDRS - ../powf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - sin - SRCS - sin.cpp - HDRS - ../sin.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - sinf - SRCS - sinf.cpp - HDRS - ../sinf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - sincos - SRCS - sincos.cpp - HDRS - ../sincos.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - sincosf - SRCS - sincosf.cpp - HDRS - ../sincosf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( sinh SRCS @@ -1009,42 +528,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - sinhf - SRCS - sinhf.cpp - HDRS - ../sinhf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - tan - SRCS - tan.cpp - HDRS - ../tan.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - tanf - SRCS - tanf.cpp - HDRS - ../tanf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( tanh SRCS @@ -1057,18 +540,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - tanhf - SRCS - tanhf.cpp - HDRS - ../tanhf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( tgamma SRCS @@ -1092,27 +563,3 @@ add_entrypoint_object( -O2 VENDOR ) - -add_entrypoint_object( - frexp - SRCS - frexp.cpp - HDRS - ../frexp.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - frexpf - SRCS - frexpf.cpp - HDRS - ../frexpf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) diff --git a/libc/src/math/cbrtf.h b/libc/src/math/cbrtf.h new file mode 100644 index 00000000000000..74744594f29864 --- /dev/null +++ b/libc/src/math/cbrtf.h @@ -0,0 +1,18 @@ +//===-- Implementation header for cbrtf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_CBRTF_H +#define LLVM_LIBC_SRC_MATH_CBRTF_H + +namespace LIBC_NAMESPACE { + +float cbrtf(float x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_CBRTF_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index fc7d6996af1e6c..e2bbdcfe5a15b8 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4092,3 +4092,19 @@ add_entrypoint_object( COMPILE_OPTIONS -O3 ) + +add_entrypoint_object( + cbrtf + SRCS + cbrtf.cpp + HDRS + ../cbrtf.h + COMPILE_OPTIONS + -O3 + DEPENDS + libc.hdr.fenv_macros + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.macros.optimization +) diff --git a/libc/src/math/generic/cbrtf.cpp b/libc/src/math/generic/cbrtf.cpp new file mode 100644 index 00000000000000..a1eb58d463a065 --- /dev/null +++ b/libc/src/math/generic/cbrtf.cpp @@ -0,0 +1,157 @@ +//===-- Implementation of cbrtf function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/cbrtf.h" +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/common.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE { + +namespace { + +// Look up table for 2^(i/3) for i = 0, 1, 2. +constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}; + +// Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1 +// generated by Sollya with: +// > for i from 0 to 15 do { +// P = fpminimax((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]); +// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",", +// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",", +// coeff(P, 6), "},"); +// }; +// Then (1 + x)^(1/3) ~ 1 + x * P(x). +constexpr double COEFFS[16][7] = { + {0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5, + -0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6, + 0x1.047f75e0aff14p-6}, + {0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5, + -0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6, + 0x1.716eca1d6e3bcp-7}, + {0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5, + -0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6, + 0x1.0aeca34893785p-7}, + {0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5, + -0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6, + 0x1.88410674c6a5dp-8}, + {0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5, + -0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7, + 0x1.249bb51a1c498p-8}, + {0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5, + -0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7, + 0x1.ba9230918fa2ep-9}, + {0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5, + -0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7, + 0x1.52e3e17c71069p-9}, + {0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5, + -0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7, + 0x1.0677d9af6aad4p-9}, + {0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5, + -0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7, + 0x1.9ad325dc7adcbp-10}, + {0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5, + -0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8, + 0x1.449f5ee175c69p-10}, + {0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5, + -0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8, + 0x1.02cb36389bd79p-10}, + {0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5, + -0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8, + 0x1.a008ae91f5936p-11}, + {0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5, + -0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8, + 0x1.50f8174cdb6e9p-11}, + {0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5, + -0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9, + 0x1.12e0e94e8586dp-11}, + {0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5, + -0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9, + 0x1.c3726535f1fc6p-12}, + {0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5, + -0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9, + 0x1.75077abf18d84p-12}, +}; + +} // anonymous namespace + +LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) { + using FloatBits = typename fputil::FPBits; + using DoubleBits = typename fputil::FPBits; + + FloatBits x_bits(x); + + uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff; + uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN; + + if (LIBC_UNLIKELY(x_abs == 0 || x_abs >= 0x7f80'0000)) { + // x is 0, Inf, or NaN. + return x; + } + + double xd = static_cast(x); + DoubleBits xd_bits(xd); + + // When using biased exponent of x in double precision, + // x_e = real_exponent_of_x + 1023 + // Then: + // x_e / 3 = real_exponent_of_x / 3 + 1023/3 + // = real_exponent_of_x / 3 + 341 + // So to make it the correct biased exponent of x^(1/3), we add + // 1023 - 341 = 682 + // to the quotient x_e / 3. + unsigned x_e = static_cast(xd_bits.get_biased_exponent()); + unsigned out_e = (x_e / 3 + 682) | sign_bit; + unsigned shift_e = x_e % 3; + + // Set x_m = 2^(x_e % 3) * (1.mantissa) + uint64_t x_m = xd_bits.get_mantissa(); + // Use the leading 4 bits for look up table + unsigned idx = static_cast(x_m >> (DoubleBits::FRACTION_LEN - 4)); + + x_m |= static_cast(DoubleBits::EXP_BIAS) + << DoubleBits::FRACTION_LEN; + + double x_reduced = DoubleBits(x_m).get_val(); + double dx = x_reduced - 1.0; + + double dx_sq = dx * dx; + double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0); + double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]); + double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]); + double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]); + + double dx_4 = dx_sq * dx_sq; + double p0 = fputil::multiply_add(dx_sq, c1, c0); + double p1 = fputil::multiply_add(dx_sq, c3, c2); + + double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e]; + + uint64_t r_m = DoubleBits(r).get_mantissa(); + // Check if the output is exact. To be exact, the smallest 1-bit of the + // output has to be at least 2^-7 or higher. So we check the lowest 44 bits + // to see if they are within 2^(-52 + 3) errors from all zeros, then the + // result cube root is exact. + if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) { + if ((r_m & 0xfffffffffff) <= 8) + r_m &= 0xffff'ffff'ffff'ffe0; + else + r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20; + fputil::clear_except_if_required(FE_INEXACT); + } + // Adjust exponent and sign. + uint64_t r_bits = + r_m | (static_cast(out_e) << DoubleBits::FRACTION_LEN); + + return static_cast(DoubleBits(r_bits).get_val()); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/cos.cpp b/libc/src/math/generic/cos.cpp index a2cfe758fe4cc0..0eb6d9d6a6de85 100644 --- a/libc/src/math/generic/cos.cpp +++ b/libc/src/math/generic/cos.cpp @@ -61,7 +61,7 @@ LLVM_LIBC_FUNCTION(double, cos, (double x)) { DoubleDouble y; unsigned k; - generic::LargeRangeReduction range_reduction_large; + generic::LargeRangeReduction range_reduction_large{}; // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA) if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) { diff --git a/libc/src/math/generic/sin.cpp b/libc/src/math/generic/sin.cpp index 207435d4385ae1..e7a43245408bfd 100644 --- a/libc/src/math/generic/sin.cpp +++ b/libc/src/math/generic/sin.cpp @@ -62,7 +62,7 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) { DoubleDouble y; unsigned k; - generic::LargeRangeReduction range_reduction_large; + generic::LargeRangeReduction range_reduction_large{}; // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA) if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) { diff --git a/libc/src/math/generic/sincos.cpp b/libc/src/math/generic/sincos.cpp index a0dd3a018af59c..ed70e380b72e8a 100644 --- a/libc/src/math/generic/sincos.cpp +++ b/libc/src/math/generic/sincos.cpp @@ -63,7 +63,7 @@ LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) { DoubleDouble y; unsigned k; - generic::LargeRangeReduction range_reduction_large; + generic::LargeRangeReduction range_reduction_large{}; // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA) if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) { diff --git a/libc/src/math/generic/tan.cpp b/libc/src/math/generic/tan.cpp index e6230e9c1cd69c..d153cc53acc807 100644 --- a/libc/src/math/generic/tan.cpp +++ b/libc/src/math/generic/tan.cpp @@ -95,7 +95,7 @@ LIBC_INLINE DoubleDouble tan_eval(const DoubleDouble &u) { } // Accurate evaluation of tan for small u. -Float128 tan_eval(const Float128 &u) { +[[maybe_unused]] Float128 tan_eval(const Float128 &u) { Float128 u_sq = fputil::quick_mul(u, u); // tan(x) ~ x + x^3/3 + x^5 * 2/15 + x^7 * 17/315 + x^9 * 62/2835 + @@ -127,7 +127,8 @@ Float128 tan_eval(const Float128 &u) { // Calculation a / b = a * (1/b) for Float128. // Using the initial approximation of q ~ (1/b), then apply 2 Newton-Raphson // iterations, before multiplying by a. -Float128 newton_raphson_div(const Float128 &a, Float128 b, double q) { +[[maybe_unused]] Float128 newton_raphson_div(const Float128 &a, Float128 b, + double q) { Float128 q0(q); constexpr Float128 TWO(2.0); b.sign = (b.sign == Sign::POS) ? Sign::NEG : Sign::POS; @@ -158,7 +159,7 @@ LLVM_LIBC_FUNCTION(double, tan, (double x)) { if (LIBC_UNLIKELY(x == 0.0)) return x; - // For |x| < 2^-27, |tan(x) - x| < ulp(x)/2. + // For |x| < 2^-27, |tan(x) - x| < ulp(x)/2. #ifdef LIBC_TARGET_CPU_HAS_FMA return fputil::multiply_add(x, 0x1.0p-54, x); #else diff --git a/libc/src/math/nvptx/CMakeLists.txt b/libc/src/math/nvptx/CMakeLists.txt index 581e1c6a3044b2..a09668ca10678d 100644 --- a/libc/src/math/nvptx/CMakeLists.txt +++ b/libc/src/math/nvptx/CMakeLists.txt @@ -301,18 +301,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - acosf - SRCS - acosf.cpp - HDRS - ../acosf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( acosh SRCS @@ -325,18 +313,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - acoshf - SRCS - acoshf.cpp - HDRS - ../acoshf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( asin SRCS @@ -349,18 +325,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - asinf - SRCS - asinf.cpp - HDRS - ../asinf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( asinh SRCS @@ -385,18 +349,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - atanf - SRCS - atanf.cpp - HDRS - ../atanf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( atan2 SRCS @@ -409,18 +361,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - atan2f - SRCS - atan2f.cpp - HDRS - ../atan2f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( atanh SRCS @@ -433,18 +373,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - atanhf - SRCS - atanhf.cpp - HDRS - ../atanhf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( cos SRCS @@ -457,18 +385,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - cosf - SRCS - cosf.cpp - HDRS - ../cosf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( cosh SRCS @@ -481,18 +397,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - coshf - SRCS - coshf.cpp - HDRS - ../coshf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( erf SRCS @@ -505,258 +409,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - erff - SRCS - erff.cpp - HDRS - ../erff.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp - SRCS - exp.cpp - HDRS - ../exp.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp10 - SRCS - exp10.cpp - HDRS - ../exp10.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp10f - SRCS - exp10f.cpp - HDRS - ../exp10f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp2 - SRCS - exp2.cpp - HDRS - ../exp2.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - exp2f - SRCS - exp2f.cpp - HDRS - ../exp2f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - expf - SRCS - expf.cpp - HDRS - ../expf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - expm1 - SRCS - expm1.cpp - HDRS - ../expm1.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - expm1f - SRCS - expm1f.cpp - HDRS - ../expm1f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - fdim - SRCS - fdim.cpp - HDRS - ../fdim.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - fdimf - SRCS - fdimf.cpp - HDRS - ../fdimf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - hypot - SRCS - hypot.cpp - HDRS - ../hypot.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - hypotf - SRCS - hypotf.cpp - HDRS - ../hypotf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - ilogb - SRCS - ilogb.cpp - HDRS - ../ilogb.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - ilogbf - SRCS - ilogbf.cpp - HDRS - ../ilogbf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log10 - SRCS - log10.cpp - HDRS - ../log10.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log10f - SRCS - log10f.cpp - HDRS - ../log10f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log2 - SRCS - log2.cpp - HDRS - ../log2.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log2f - SRCS - log2f.cpp - HDRS - ../log2f.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log - SRCS - log.cpp - HDRS - ../log.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - logf - SRCS - logf.cpp - HDRS - ../logf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( lrint SRCS @@ -781,54 +433,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - ldexp - SRCS - ldexp.cpp - HDRS - ../ldexp.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - ldexpf - SRCS - ldexpf.cpp - HDRS - ../ldexpf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log1p - SRCS - log1p.cpp - HDRS - ../log1p.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - log1pf - SRCS - log1pf.cpp - HDRS - ../log1pf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( llrint SRCS @@ -853,79 +457,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - remquo - SRCS - remquo.cpp - HDRS - ../remquo.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - remquof - SRCS - remquof.cpp - HDRS - ../remquof.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - scalbn - SRCS - scalbn.cpp - HDRS - ../scalbn.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - scalbnf - SRCS - scalbnf.cpp - HDRS - ../scalbnf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - - -add_entrypoint_object( - nextafter - SRCS - nextafter.cpp - HDRS - ../nextafter.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - nextafterf - SRCS - nextafterf.cpp - HDRS - ../nextafterf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( pow SRCS @@ -938,66 +469,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - powf - SRCS - powf.cpp - HDRS - ../powf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - sin - SRCS - sin.cpp - HDRS - ../sin.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - sinf - SRCS - sinf.cpp - HDRS - ../sinf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - sincos - SRCS - sincos.cpp - HDRS - ../sincos.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - sincosf - SRCS - sincosf.cpp - HDRS - ../sincosf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( sinh SRCS @@ -1010,42 +481,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - sinhf - SRCS - sinhf.cpp - HDRS - ../sinhf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - tan - SRCS - tan.cpp - HDRS - ../tan.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - tanf - SRCS - tanf.cpp - HDRS - ../tanf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( tanh SRCS @@ -1058,18 +493,6 @@ add_entrypoint_object( VENDOR ) -add_entrypoint_object( - tanhf - SRCS - tanhf.cpp - HDRS - ../tanhf.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - add_entrypoint_object( tgamma SRCS @@ -1093,27 +516,3 @@ add_entrypoint_object( -O2 VENDOR ) - -add_entrypoint_object( - frexp - SRCS - frexp.cpp - HDRS - ../frexp.h - COMPILE_OPTIONS - ${bitcode_link_flags} - -O2 - VENDOR -) - -add_entrypoint_object( - frexpf - SRCS - frexpf.cpp - HDRS - ../frexpf.h - COMPILE_OPTIONS - ${itcode_link_flags} - -O2 - VENDOR -) diff --git a/libc/src/math/nvptx/llrint.cpp b/libc/src/math/nvptx/llrint.cpp index 8f95e75e779b51..307420a9b8b261 100644 --- a/libc/src/math/nvptx/llrint.cpp +++ b/libc/src/math/nvptx/llrint.cpp @@ -13,6 +13,8 @@ namespace LIBC_NAMESPACE { -LLVM_LIBC_FUNCTION(long long, llrint, (double x)) { return __nv_llrint(x); } +LLVM_LIBC_FUNCTION(long long, llrint, (double x)) { + return static_cast(__builtin_rint(x)); +} } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/nvptx/llrintf.cpp b/libc/src/math/nvptx/llrintf.cpp index 1432ffbd1bdad3..23404990fb1bdf 100644 --- a/libc/src/math/nvptx/llrintf.cpp +++ b/libc/src/math/nvptx/llrintf.cpp @@ -13,6 +13,8 @@ namespace LIBC_NAMESPACE { -LLVM_LIBC_FUNCTION(long long, llrintf, (float x)) { return __nv_llrintf(x); } +LLVM_LIBC_FUNCTION(long long, llrintf, (float x)) { + return static_cast(__builtin_rintf(x)); +} } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/nvptx/lrint.cpp b/libc/src/math/nvptx/lrint.cpp index 8585f4ce53a4d1..b335b4f06393ca 100644 --- a/libc/src/math/nvptx/lrint.cpp +++ b/libc/src/math/nvptx/lrint.cpp @@ -13,6 +13,8 @@ namespace LIBC_NAMESPACE { -LLVM_LIBC_FUNCTION(long, lrint, (double x)) { return __nv_lrint(x); } +LLVM_LIBC_FUNCTION(long, lrint, (double x)) { + return static_cast(__builtin_rint(x)); +} } // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hsearch.cpp b/libc/src/search/hsearch.cpp index 5aeb5c29449e1e..a30803c5a0de72 100644 --- a/libc/src/search/hsearch.cpp +++ b/libc/src/search/hsearch.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(ENTRY *, hsearch, (ENTRY item, ACTION action)) { - ENTRY *result; + ENTRY *result = nullptr; if (internal::global_hash_table == nullptr) { // If global_hash_table is null, we create a new hash table with a minimal // capacity. Such hashtable will be expanded as needed. diff --git a/libc/src/stdio/fopencookie.cpp b/libc/src/stdio/fopencookie.cpp index 2cb7ad2f46ebf6..17b8ae199d3db3 100644 --- a/libc/src/stdio/fopencookie.cpp +++ b/libc/src/stdio/fopencookie.cpp @@ -24,7 +24,7 @@ class CookieFile : public LIBC_NAMESPACE::File { static FileIOResult cookie_write(File *f, const void *data, size_t size); static FileIOResult cookie_read(File *f, void *data, size_t size); - static ErrorOr cookie_seek(File *f, long offset, int whence); + static ErrorOr cookie_seek(File *f, off_t offset, int whence); static int cookie_close(File *f); public: @@ -52,7 +52,7 @@ FileIOResult CookieFile::cookie_read(File *f, void *data, size_t size) { reinterpret_cast(data), size); } -ErrorOr CookieFile::cookie_seek(File *f, long offset, int whence) { +ErrorOr CookieFile::cookie_seek(File *f, off_t offset, int whence) { auto cookie_file = reinterpret_cast(f); if (cookie_file->ops.seek == nullptr) { return Error(EINVAL); @@ -61,8 +61,7 @@ ErrorOr CookieFile::cookie_seek(File *f, long offset, int whence) { int result = cookie_file->ops.seek(cookie_file->cookie, &offset64, whence); if (result == 0) return offset64; - else - return -1; + return -1; } int CookieFile::cookie_close(File *f) { diff --git a/libc/src/stdio/generic/ftell.cpp b/libc/src/stdio/generic/ftell.cpp index d55bad2828541b..16487cabf63800 100644 --- a/libc/src/stdio/generic/ftell.cpp +++ b/libc/src/stdio/generic/ftell.cpp @@ -19,7 +19,11 @@ LLVM_LIBC_FUNCTION(long, ftell, (::FILE * stream)) { libc_errno = result.error(); return -1; } - return result.value(); + // tell() returns an off_t (64-bit signed integer), but this function returns + // a long (32-bit signed integer in 32-bit systems). We add a cast here to + // silence a "implicit conversion loses integer precision" warning when + // compiling for 32-bit systems. + return static_cast(result.value()); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt index b3445300059fa9..e2b49e0c915284 100644 --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -1,8 +1,26 @@ +if(LIBC_CONF_SCANF_DISABLE_FLOAT) + list(APPEND scanf_config_copts "-DLIBC_COPT_SCANF_DISABLE_FLOAT") +endif() +if(LIBC_CONF_SCANF_DISABLE_INDEX_MODE) + list(APPEND scanf_config_copts "-DLIBC_COPT_SCANF_DISABLE_INDEX_MODE") +endif() +if(scanf_config_copts) + list(PREPEND scanf_config_copts "COMPILE_OPTIONS") +endif() + +add_header_library( + scanf_config + HDRS + scanf_config.h + ${scanf_config_copts} +) + add_header_library( core_structs HDRS core_structs.h DEPENDS + .scanf_config libc.src.__support.CPP.string_view libc.src.__support.CPP.bitset libc.src.__support.FPUtil.fp_bits diff --git a/libc/src/sys/mman/linux/mmap.cpp b/libc/src/sys/mman/linux/mmap.cpp index 16111c66859f5e..2aa7003f342a96 100644 --- a/libc/src/sys/mman/linux/mmap.cpp +++ b/libc/src/sys/mman/linux/mmap.cpp @@ -39,9 +39,12 @@ LLVM_LIBC_FUNCTION(void *, mmap, #error "mmap or mmap2 syscalls not available." #endif + // We add an explicit cast to silence a "implicit conversion loses integer + // precision" warning when compiling for 32-bit systems. + long mmap_offset = static_cast(offset); long ret = LIBC_NAMESPACE::syscall_impl(syscall_number, reinterpret_cast(addr), - size, prot, flags, fd, offset); + size, prot, flags, fd, mmap_offset); // The mmap/mmap2 syscalls return negative values on error. These negative // values are actually the negative values of the error codes. So, fix them diff --git a/libc/src/sys/statvfs/linux/CMakeLists.txt b/libc/src/sys/statvfs/linux/CMakeLists.txt index f818863bb4707f..a6660c02badf78 100644 --- a/libc/src/sys/statvfs/linux/CMakeLists.txt +++ b/libc/src/sys/statvfs/linux/CMakeLists.txt @@ -8,6 +8,7 @@ add_header_library( libc.src.__support.common libc.src.__support.CPP.optional libc.include.sys_syscall + libc.include.llvm-libc-types.struct_statvfs ) add_entrypoint_object( diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt index 77db76518350c7..ddafcd7c92f210 100644 --- a/libc/src/unistd/CMakeLists.txt +++ b/libc/src/unistd/CMakeLists.txt @@ -69,6 +69,13 @@ add_entrypoint_object( .${LIBC_TARGET_OS}.fork ) +add_entrypoint_object( + fpathconf + ALIAS + DEPENDS + .${LIBC_TARGET_OS}.fpathconf +) + add_entrypoint_object( execv ALIAS @@ -160,6 +167,14 @@ add_entrypoint_object( .${LIBC_TARGET_OS}.lseek ) +add_entrypoint_object( + pathconf + ALIAS + DEPENDS + .${LIBC_TARGET_OS}.pathconf +) + + add_entrypoint_object( pipe ALIAS diff --git a/libc/src/unistd/fpathconf.h b/libc/src/unistd/fpathconf.h new file mode 100644 index 00000000000000..f3182c8123618b --- /dev/null +++ b/libc/src/unistd/fpathconf.h @@ -0,0 +1,18 @@ +//===-- Implementation header for fpathconf ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UNISTD_FPATHCONF_H +#define LLVM_LIBC_SRC_UNISTD_FPATHCONF_H + +namespace LIBC_NAMESPACE { + +long fpathconf(int fd, int name); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_UNISTD_FSYNC_H diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index 7d831f9c29c74f..7e733d7f002c32 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -105,6 +105,20 @@ add_entrypoint_object( libc.src.errno.errno ) +add_entrypoint_object( + fpathconf + SRCS + fpathconf.cpp + HDRS + ../fpathconf.h + DEPENDS + libc.include.unistd + libc.include.sys_syscall + libc.src.__support.OSUtil.osutil + libc.src.errno.errno + libc.src.unistd.linux.pathconf_utils +) + add_entrypoint_object( execv SRCS @@ -273,6 +287,34 @@ add_entrypoint_object( libc.src.errno.errno ) +add_entrypoint_object( + pathconf + SRCS + pathconf.cpp + HDRS + ../pathconf.h + DEPENDS + libc.include.unistd + libc.include.sys_syscall + libc.src.__support.OSUtil.osutil + libc.src.errno.errno + libc.src.unistd.linux.pathconf_utils +) + +add_object_library( + pathconf_utils + SRCS + pathconf_utils.cpp + HDRS + pathconf_utils.h + DEPENDS + libc.hdr.limits_macros + libc.hdr.unistd_macros + libc.src.__support.OSUtil.osutil + libc.src.errno.errno + libc.src.sys.statvfs.linux.statfs_utils +) + add_entrypoint_object( pipe SRCS diff --git a/libc/src/unistd/linux/fpathconf.cpp b/libc/src/unistd/linux/fpathconf.cpp new file mode 100644 index 00000000000000..6643ed1cc9e882 --- /dev/null +++ b/libc/src/unistd/linux/fpathconf.cpp @@ -0,0 +1,24 @@ +//===-- Linux implementation of fpathconf ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/unistd/fpathconf.h" +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/common.h" +#include "src/sys/statvfs/linux/statfs_utils.h" +#include "src/unistd/linux/pathconf_utils.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(long, fpathconf, (int fd, int name)) { + if (cpp::optional result = + statfs_utils::linux_fstatfs(fd)) + return pathconfig(result.value(), name); + return -1; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/unistd/linux/pathconf.cpp b/libc/src/unistd/linux/pathconf.cpp new file mode 100644 index 00000000000000..11427698232b5b --- /dev/null +++ b/libc/src/unistd/linux/pathconf.cpp @@ -0,0 +1,23 @@ +//===-- Linux implementation of pathconf ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/unistd/pathconf.h" +#include "src/errno/libc_errno.h" +#include "src/sys/statvfs/linux/statfs_utils.h" +#include "src/unistd/linux/pathconf_utils.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(long, pathconf, (const char *path, int name)) { + if (cpp::optional result = + statfs_utils::linux_statfs(path)) + return pathconfig(result.value(), name); + return -1; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/unistd/linux/pathconf_utils.cpp b/libc/src/unistd/linux/pathconf_utils.cpp new file mode 100644 index 00000000000000..3f963ab5aaaf71 --- /dev/null +++ b/libc/src/unistd/linux/pathconf_utils.cpp @@ -0,0 +1,127 @@ +//===-- Linux implementation of pathconf_utils ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This header must go before limits_macros.h otherwise libc header may choose +// to undefine LINK_MAX. +#include // For LINK_MAX and other limits + +#include "hdr/limits_macros.h" +#include "hdr/unistd_macros.h" +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/common.h" +#include "src/errno/libc_errno.h" +#include "src/sys/statvfs/linux/statfs_utils.h" + +// other linux specific includes +#include +#if __has_include() +#include +#else +// from https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/ +#define UFS_MAGIC 0x00011954 +#endif +#include // For common FS magics + +namespace LIBC_NAMESPACE { + +long filesizebits(const statfs_utils::LinuxStatFs &s) { + switch (s.f_type) { + case JFFS2_SUPER_MAGIC: + case MSDOS_SUPER_MAGIC: + case NCP_SUPER_MAGIC: + return 32; + } + return 64; +} + +long link_max(const statfs_utils::LinuxStatFs &s) { + switch (s.f_type) { + case EXT2_SUPER_MAGIC: + return 32000; + case MINIX_SUPER_MAGIC: + return 250; + case MINIX2_SUPER_MAGIC: + return 65530; + case REISERFS_SUPER_MAGIC: + return 0xffff - 1000; + case UFS_MAGIC: + return 32000; + } + return LINK_MAX; +} + +long symlinks(const statfs_utils::LinuxStatFs &s) { + switch (s.f_type) { + case ADFS_SUPER_MAGIC: + case BFS_MAGIC: + case CRAMFS_MAGIC: + case EFS_SUPER_MAGIC: + case MSDOS_SUPER_MAGIC: + case QNX4_SUPER_MAGIC: + return 0; + } + return 1; +} + +long pathconfig(const statfs_utils::LinuxStatFs &s, int name) { + switch (name) { + case _PC_LINK_MAX: + return link_max(s); + + case _PC_FILESIZEBITS: + return filesizebits(s); + + case _PC_2_SYMLINKS: + return symlinks(s); + + case _PC_REC_MIN_XFER_SIZE: + return s.f_bsize; + + case _PC_ALLOC_SIZE_MIN: + case _PC_REC_XFER_ALIGN: + return s.f_frsize; + + case _PC_MAX_CANON: + return _POSIX_MAX_CANON; + + case _PC_MAX_INPUT: + return _POSIX_MAX_INPUT; + + case _PC_NAME_MAX: + return s.f_namelen; + + case _PC_PATH_MAX: + return _POSIX_PATH_MAX; + + case _PC_PIPE_BUF: + return _POSIX_PIPE_BUF; + + case _PC_CHOWN_RESTRICTED: + return _POSIX_CHOWN_RESTRICTED; + + case _PC_NO_TRUNC: + return _POSIX_NO_TRUNC; + + case _PC_VDISABLE: + return _POSIX_VDISABLE; + + case _PC_ASYNC_IO: + case _PC_PRIO_IO: + case _PC_REC_INCR_XFER_SIZE: + case _PC_REC_MAX_XFER_SIZE: + case _PC_SYMLINK_MAX: + case _PC_SYNC_IO: + return -1; + + default: + libc_errno = EINVAL; + return -1; + } +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/unistd/linux/pathconf_utils.h b/libc/src/unistd/linux/pathconf_utils.h new file mode 100644 index 00000000000000..2c0ec0ea292f42 --- /dev/null +++ b/libc/src/unistd/linux/pathconf_utils.h @@ -0,0 +1,20 @@ +//===-- Implementation header for pathconf_utils ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UNISTD_PATHCONF_UTILS_H +#define LLVM_LIBC_SRC_UNISTD_PATHCONF_UTILS_H + +#include "src/sys/statvfs/linux/statfs_utils.h" + +namespace LIBC_NAMESPACE { + +long pathconfig(const statfs_utils::LinuxStatFs &s, int name); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_UNISTD_PREAD_H diff --git a/libc/src/unistd/linux/pread.cpp b/libc/src/unistd/linux/pread.cpp index 11cefc5c2f3a89..247a7b4d9d5099 100644 --- a/libc/src/unistd/linux/pread.cpp +++ b/libc/src/unistd/linux/pread.cpp @@ -19,15 +19,21 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(ssize_t, pread, (int fd, void *buf, size_t count, off_t offset)) { -#ifdef LIBC_TARGET_ARCH_IS_RISCV32 - static_assert(sizeof(off_t) == 8); - ssize_t ret = LIBC_NAMESPACE::syscall_impl( - SYS_pread64, fd, buf, count, (long)offset, - (long)(((uint64_t)(offset)) >> 32)); -#else - ssize_t ret = LIBC_NAMESPACE::syscall_impl(SYS_pread64, fd, buf, - count, offset); -#endif + ssize_t ret; + if constexpr (sizeof(long) == sizeof(uint32_t) && + sizeof(off_t) == sizeof(uint64_t)) { + // This is a 32-bit system with a 64-bit offset, offset must be split. + const uint64_t bits = cpp::bit_cast(offset); + const uint32_t lo = bits & UINT32_MAX; + const uint32_t hi = bits >> 32; + const long offset_low = cpp::bit_cast(static_cast(lo)); + const long offset_high = cpp::bit_cast(static_cast(hi)); + ret = LIBC_NAMESPACE::syscall_impl(SYS_pread64, fd, buf, count, + offset_low, offset_high); + } else { + ret = LIBC_NAMESPACE::syscall_impl(SYS_pread64, fd, buf, count, + offset); + } // The cast is important since there is a check that dereferences the pointer // which fails on void*. MSAN_UNPOISON(reinterpret_cast(buf), count); diff --git a/libc/src/unistd/linux/pwrite.cpp b/libc/src/unistd/linux/pwrite.cpp index 6c6a0b555ac133..b0540a09710e7c 100644 --- a/libc/src/unistd/linux/pwrite.cpp +++ b/libc/src/unistd/linux/pwrite.cpp @@ -19,15 +19,23 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(ssize_t, pwrite, (int fd, const void *buf, size_t count, off_t offset)) { -#ifdef LIBC_TARGET_ARCH_IS_RISCV32 - static_assert(sizeof(off_t) == 8); - ssize_t ret = LIBC_NAMESPACE::syscall_impl( - SYS_pwrite64, fd, buf, count, (long)offset, - (long)(((uint64_t)(offset)) >> 32)); -#else - ssize_t ret = LIBC_NAMESPACE::syscall_impl(SYS_pwrite64, fd, buf, - count, offset); -#endif + + ssize_t ret; + if constexpr (sizeof(long) == sizeof(uint32_t) && + sizeof(off_t) == sizeof(uint64_t)) { + // This is a 32-bit system with a 64-bit offset, offset must be split. + const uint64_t bits = cpp::bit_cast(offset); + const uint32_t lo = bits & UINT32_MAX; + const uint32_t hi = bits >> 32; + const long offset_low = cpp::bit_cast(static_cast(lo)); + const long offset_high = cpp::bit_cast(static_cast(hi)); + ret = LIBC_NAMESPACE::syscall_impl(SYS_pwrite64, fd, buf, count, + offset_low, offset_high); + } else { + ret = LIBC_NAMESPACE::syscall_impl(SYS_pwrite64, fd, buf, count, + offset); + } + if (ret < 0) { libc_errno = static_cast(-ret); return -1; diff --git a/libc/src/unistd/pathconf.h b/libc/src/unistd/pathconf.h new file mode 100644 index 00000000000000..6543d738a9b227 --- /dev/null +++ b/libc/src/unistd/pathconf.h @@ -0,0 +1,18 @@ +//===-- Implementation header for pathconf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UNISTD_PATHCONF_H +#define LLVM_LIBC_SRC_UNISTD_PATHCONF_H + +namespace LIBC_NAMESPACE { + +long pathconf(const char *path, int name); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_UNISTD_PREAD_H diff --git a/libc/startup/linux/aarch64/tls.cpp b/libc/startup/linux/aarch64/tls.cpp index f2579e821b1bf2..9f143f962892d8 100644 --- a/libc/startup/linux/aarch64/tls.cpp +++ b/libc/startup/linux/aarch64/tls.cpp @@ -80,7 +80,18 @@ void cleanup_tls(uintptr_t addr, uintptr_t size) { } bool set_thread_ptr(uintptr_t val) { - __arm_wsr64("tpidr_el0", val); +// The PR for __arm_wsr64 support in GCC was merged on Dec 6, 2023, and it is +// not yet usable in 13.3.0 +// https://github.com/gcc-mirror/gcc/commit/fc42900d21abd5eacb7537c3c8ffc5278d510195 +#if __has_builtin(__builtin_arm_wsr64) + __builtin_arm_wsr64("tpidr_el0", val); +#elif __has_builtin(__builtin_aarch64_wsr) + __builtin_aarch64_wsr("tpidr_el0", val); +#elif defined(__GNUC__) + asm volatile("msr tpidr_el0, %0" ::"r"(val)); +#else +#error "Unsupported compiler" +#endif return true; } } // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp index fbcedc163de10a..2f68c3faa0ad08 100644 --- a/libc/test/src/__support/File/file_test.cpp +++ b/libc/test/src/__support/File/file_test.cpp @@ -31,8 +31,8 @@ class StringFile : public File { static FileIOResult str_read(LIBC_NAMESPACE::File *f, void *data, size_t len); static FileIOResult str_write(LIBC_NAMESPACE::File *f, const void *data, size_t len); - static ErrorOr str_seek(LIBC_NAMESPACE::File *f, long offset, - int whence); + static ErrorOr str_seek(LIBC_NAMESPACE::File *f, off_t offset, + int whence); static int str_close(LIBC_NAMESPACE::File *f) { delete reinterpret_cast(f); return 0; @@ -93,8 +93,8 @@ FileIOResult StringFile::str_write(LIBC_NAMESPACE::File *f, const void *data, return i; } -ErrorOr StringFile::str_seek(LIBC_NAMESPACE::File *f, long offset, - int whence) { +ErrorOr StringFile::str_seek(LIBC_NAMESPACE::File *f, off_t offset, + int whence) { StringFile *sf = static_cast(f); if (whence == SEEK_SET) sf->pos = offset; diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 35ca97b5de8af0..0dc7ae6aae2df6 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -2213,6 +2213,18 @@ add_fp_unittest( libc.src.math.f16sqrtl ) +add_fp_unittest( + cbrtf_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + cbrtf_test.cpp + DEPENDS + libc.src.math.cbrtf + libc.src.__support.FPUtil.fp_bits +) + add_subdirectory(generic) add_subdirectory(smoke) diff --git a/libc/test/src/math/cbrtf_test.cpp b/libc/test/src/math/cbrtf_test.cpp new file mode 100644 index 00000000000000..1d7d2189d52681 --- /dev/null +++ b/libc/test/src/math/cbrtf_test.cpp @@ -0,0 +1,42 @@ +//===-- Unittests for cbrtf -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/math_macros.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/math/cbrtf.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcCbrtfTest = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +TEST_F(LlvmLibcCbrtfTest, InFloatRange) { + constexpr uint32_t COUNT = 100'000; + const uint32_t STEP = FPBits(inf).uintval() / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + float x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cbrt, x, + LIBC_NAMESPACE::cbrtf(x), 0.5); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cbrt, -x, + LIBC_NAMESPACE::cbrtf(-x), 0.5); + } +} + +TEST_F(LlvmLibcCbrtfTest, SpecialValues) { + constexpr float INPUTS[] = { + 0x1.60451p2f, 0x1.31304cp1f, 0x1.d17cp2f, 0x1.bp-143f, 0x1.338cp2f, + }; + for (float v : INPUTS) { + float x = FPBits(v).get_val(); + mpfr::ForceRoundingMode r(mpfr::RoundingMode::Upward); + EXPECT_MPFR_MATCH(mpfr::Operation::Cbrt, x, LIBC_NAMESPACE::cbrtf(x), 0.5, + mpfr::RoundingMode::Upward); + } +} diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt index c5f75b51cbd9f6..6c10ea422109e7 100644 --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -486,3 +486,19 @@ add_fp_unittest( LINK_LIBRARIES -lpthread ) + +add_fp_unittest( + cbrtf_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + cbrtf_test.cpp + DEPENDS + .exhaustive_test + libc.src.math.cbrtf + libc.src.__support.FPUtil.fp_bits + LINK_LIBRARIES + -lpthread +) diff --git a/libc/test/src/math/exhaustive/cbrtf_test.cpp b/libc/test/src/math/exhaustive/cbrtf_test.cpp new file mode 100644 index 00000000000000..e4511e1f7ee1b9 --- /dev/null +++ b/libc/test/src/math/exhaustive/cbrtf_test.cpp @@ -0,0 +1,33 @@ +//===-- Exhaustive test for cbrtf -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/math/cbrtf.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +using LlvmLibcCbrtfExhaustiveTest = + LlvmLibcUnaryOpExhaustiveMathTest; + +// Range: [0, Inf]; +static constexpr uint32_t POS_START = 0x0000'0000U; +static constexpr uint32_t POS_STOP = 0x7f80'0000U; + +TEST_F(LlvmLibcCbrtfExhaustiveTest, PostiveRange) { + test_full_range_all_roundings(POS_START, POS_STOP); +} + +// Range: [-Inf, 0]; +static constexpr uint32_t NEG_START = 0x8000'0000U; +static constexpr uint32_t NEG_STOP = 0xff80'0000U; + +TEST_F(LlvmLibcCbrtfExhaustiveTest, NegativeRange) { + test_full_range_all_roundings(NEG_START, NEG_STOP); +} diff --git a/libc/test/src/math/exhaustive/exhaustive_test.h b/libc/test/src/math/exhaustive/exhaustive_test.h index 6f0c78ebefa470..94489d2e55daa7 100644 --- a/libc/test/src/math/exhaustive/exhaustive_test.h +++ b/libc/test/src/math/exhaustive/exhaustive_test.h @@ -61,9 +61,9 @@ struct UnaryOpChecker : public virtual LIBC_NAMESPACE::testing::Test { TEST_MPFR_MATCH_ROUNDING_SILENTLY(Op, x, Func(x), 0.5, rounding); failed += (!correct); // Uncomment to print out failed values. - // if (!correct) { - // EXPECT_MPFR_MATCH_ROUNDING(Op, x, Func(x), 0.5, rounding); - // } + if (!correct) { + EXPECT_MPFR_MATCH_ROUNDING(Op, x, Func(x), 0.5, rounding); + } } while (bits++ < stop); return failed; } @@ -97,9 +97,9 @@ struct BinaryOpChecker : public virtual LIBC_NAMESPACE::testing::Test { 0.5, rounding); failed += (!correct); // Uncomment to print out failed values. - // if (!correct) { - // EXPECT_MPFR_MATCH_ROUNDING(Op, input, Func(x, y), 0.5, rounding); - // } + if (!correct) { + EXPECT_MPFR_MATCH_ROUNDING(Op, input, Func(x, y), 0.5, rounding); + } } while (ybits++ < y_stop); } while (xbits++ < x_stop); return failed; @@ -187,7 +187,8 @@ struct LlvmLibcExhaustiveMathTest std::stringstream msg; msg << "Test failed for " << std::dec << failed_in_range << " inputs in range: "; - explain_failed_range(msg, start, stop, extra_range_bounds...); + explain_failed_range(msg, range_begin, range_end, + extra_range_bounds...); msg << "\n"; std::cerr << msg.str() << std::flush; diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index b72d4b30787a0d..7f1bc0c204c68c 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3961,3 +3961,13 @@ add_fp_unittest( DEPENDS libc.src.math.tan ) + +add_fp_unittest( + cbrtf_test + SUITE + libc-math-smoke-tests + SRCS + cbrtf_test.cpp + DEPENDS + libc.src.math.cbrtf +) diff --git a/libc/test/src/math/smoke/cbrtf_test.cpp b/libc/test/src/math/smoke/cbrtf_test.cpp new file mode 100644 index 00000000000000..a68e57744bd0e7 --- /dev/null +++ b/libc/test/src/math/smoke/cbrtf_test.cpp @@ -0,0 +1,33 @@ +//===-- Unittests for cbrtf -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/cbrtf.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcCbrtfTest = LIBC_NAMESPACE::testing::FPTest; + +using LIBC_NAMESPACE::testing::tlog; + +TEST_F(LlvmLibcCbrtfTest, SpecialNumbers) { + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::cbrtf(aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::cbrtf(inf)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, LIBC_NAMESPACE::cbrtf(neg_inf)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::cbrtf(zero)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::cbrtf(neg_zero)); + EXPECT_FP_EQ_ALL_ROUNDING(1.0f, LIBC_NAMESPACE::cbrtf(1.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(-1.0f, LIBC_NAMESPACE::cbrtf(-1.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(2.0f, LIBC_NAMESPACE::cbrtf(8.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(-2.0f, LIBC_NAMESPACE::cbrtf(-8.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(3.0f, LIBC_NAMESPACE::cbrtf(27.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(-3.0f, LIBC_NAMESPACE::cbrtf(-27.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(5.0f, LIBC_NAMESPACE::cbrtf(125.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(-5.0f, LIBC_NAMESPACE::cbrtf(-125.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(0x1.0p42f, LIBC_NAMESPACE::cbrtf(0x1.0p126f)); + EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p42f, LIBC_NAMESPACE::cbrtf(-0x1.0p126f)); +} diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt index de3e8d9ccbb626..1a1e01e50f4e88 100644 --- a/libc/test/src/unistd/CMakeLists.txt +++ b/libc/test/src/unistd/CMakeLists.txt @@ -446,6 +446,36 @@ add_libc_unittest( libc.src.unistd.sysconf ) +add_libc_unittest( + fpathconf_test + SUITE + libc_unistd_unittests + SRCS + fpathconf_test.cpp + DEPENDS + libc.hdr.limits_macros + libc.hdr.unistd_macros + libc.hdr.sys_stat_macros + libc.src.unistd.fpathconf + libc.src.fcntl.open + libc.src.unistd.close +) + +add_libc_unittest( + pathconf_test + SUITE + libc_unistd_unittests + SRCS + pathconf_test.cpp + DEPENDS + libc.hdr.limits_macros + libc.hdr.unistd_macros + libc.hdr.sys_stat_macros + libc.src.unistd.pathconf + libc.src.fcntl.open + libc.src.unistd.close +) + add_libc_test( getopt_test HERMETIC_TEST_ONLY # Uses libc's own stderr diff --git a/libc/test/src/unistd/fpathconf_test.cpp b/libc/test/src/unistd/fpathconf_test.cpp new file mode 100644 index 00000000000000..fe63e5e0859731 --- /dev/null +++ b/libc/test/src/unistd/fpathconf_test.cpp @@ -0,0 +1,30 @@ +//===-- Unittests for fpathconf -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "hdr/fcntl_macros.h" +#include "hdr/limits_macros.h" +#include "hdr/sys_stat_macros.h" +#include "hdr/unistd_macros.h" +#include "src/fcntl/open.h" +#include "src/unistd/close.h" +#include "src/unistd/fpathconf.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" +#include "test/UnitTest/Test.h" + +using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher; + +TEST(LlvmLibcPipeTest, SmokeTest) { + constexpr const char *FILENAME = "fpathconf.test"; + auto TEST_FILE = libc_make_test_file_path(FILENAME); + int fd = LIBC_NAMESPACE::open(TEST_FILE, O_WRONLY | O_CREAT, S_IRWXU); + EXPECT_EQ(LIBC_NAMESPACE::fpathconf(fd, _PC_SYNC_IO), -1l); + EXPECT_EQ(LIBC_NAMESPACE::fpathconf(fd, _PC_PATH_MAX), + static_cast(_POSIX_PATH_MAX)); + LIBC_NAMESPACE::close(fd); +} + +// TODO: Functionality tests diff --git a/libc/test/src/unistd/pathconf_test.cpp b/libc/test/src/unistd/pathconf_test.cpp new file mode 100644 index 00000000000000..3dc4b2c6027cba --- /dev/null +++ b/libc/test/src/unistd/pathconf_test.cpp @@ -0,0 +1,30 @@ +//===-- Unittests for pathconf --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "hdr/fcntl_macros.h" +#include "hdr/limits_macros.h" +#include "hdr/sys_stat_macros.h" +#include "hdr/unistd_macros.h" +#include "src/fcntl/open.h" +#include "src/unistd/close.h" +#include "src/unistd/pathconf.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" +#include "test/UnitTest/Test.h" + +using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher; + +TEST(LlvmLibcPipeTest, SmokeTest) { + constexpr const char *FILENAME = "pathconf.test"; + auto TEST_FILE = libc_make_test_file_path(FILENAME); + int fd = LIBC_NAMESPACE::open(TEST_FILE, O_WRONLY | O_CREAT, S_IRWXU); + EXPECT_EQ(LIBC_NAMESPACE::pathconf(FILENAME, _PC_SYNC_IO), -1l); + EXPECT_EQ(LIBC_NAMESPACE::pathconf(FILENAME, _PC_PATH_MAX), + static_cast(_POSIX_PATH_MAX)); + LIBC_NAMESPACE::close(fd); +} + +// TODO: Functionality tests diff --git a/libc/utils/HdrGen/CMakeLists.txt b/libc/utils/HdrGen/CMakeLists.txt index 0ec1cba542d400..47f845b9f9a5bb 100644 --- a/libc/utils/HdrGen/CMakeLists.txt +++ b/libc/utils/HdrGen/CMakeLists.txt @@ -1,6 +1,8 @@ include(TableGen) -set(LLVM_LINK_COMPONENTS Support) +if (NOT LLVM_LINK_LLVM_DYLIB) + set(LLVM_LINK_COMPONENTS Support) +endif() add_tablegen(libc-hdrgen LIBC Command.h diff --git a/libc/utils/LibcTableGenUtil/CMakeLists.txt b/libc/utils/LibcTableGenUtil/CMakeLists.txt index dca6a7bb830655..9421383394a359 100644 --- a/libc/utils/LibcTableGenUtil/CMakeLists.txt +++ b/libc/utils/LibcTableGenUtil/CMakeLists.txt @@ -1,9 +1,13 @@ +if (NOT LLVM_LINK_LLVM_DYLIB) + set(flags "DISABLE_LLVM_LINK_LLVM_DYLIB;LINK_COMPONENTS;Support;TableGen") +else() + set(flags "LINK_COMPONENTS;TableGen") +endif() add_llvm_library( LibcTableGenUtil APIIndexer.cpp APIIndexer.h - DISABLE_LLVM_LINK_LLVM_DYLIB - LINK_COMPONENTS Support TableGen + ${flags} ) target_include_directories(LibcTableGenUtil PUBLIC ${LIBC_SOURCE_DIR}) target_include_directories(LibcTableGenUtil PRIVATE ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 6548fc36cb6b4e..99a240e555af25 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -221,6 +221,12 @@ class MPFRNumber { return result; } + MPFRNumber cbrt() const { + MPFRNumber result(*this); + mpfr_cbrt(result.value, value, mpfr_rounding); + return result; + } + MPFRNumber ceil() const { MPFRNumber result(*this); mpfr_ceil(result.value, value); @@ -702,6 +708,8 @@ unary_operation(Operation op, InputType input, unsigned int precision, return mpfrInput.atan(); case Operation::Atanh: return mpfrInput.atanh(); + case Operation::Cbrt: + return mpfrInput.cbrt(); case Operation::Ceil: return mpfrInput.ceil(); case Operation::Cos: diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 002dc919396e72..fd0d72472bf7ed 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -31,6 +31,7 @@ enum class Operation : int { Asinh, Atan, Atanh, + Cbrt, Ceil, Cos, Cosh, diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 72f624fa746b41..2eff42918886c4 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -402,6 +402,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_bitset`` ``202306L`` ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_new`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_constrained_equality`` *unimplemented* ---------------------------------------------------------- ----------------- ``__cpp_lib_copyable_function`` *unimplemented* @@ -436,12 +438,20 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_hazard_pointer`` *unimplemented* ---------------------------------------------------------- ----------------- + ``__cpp_lib_inplace_vector`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_virtual_base_of`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_is_within_lifetime`` *unimplemented* ---------------------------------------------------------- ----------------- ``__cpp_lib_linalg`` *unimplemented* ---------------------------------------------------------- ----------------- + ``__cpp_lib_optional_range_support`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_out_ptr`` *unimplemented* ---------------------------------------------------------- ----------------- + ``__cpp_lib_philox_engine`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_ranges_concat`` *unimplemented* ---------------------------------------------------------- ----------------- ``__cpp_lib_ratio`` ``202306L`` @@ -452,6 +462,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_saturation_arithmetic`` ``202311L`` ---------------------------------------------------------- ----------------- + ``__cpp_lib_senders`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_smart_ptr_owner_equality`` *unimplemented* ---------------------------------------------------------- ----------------- ``__cpp_lib_span_at`` ``202311L`` diff --git a/libcxx/docs/Status/Cxx2c.rst b/libcxx/docs/Status/Cxx2c.rst index 5f459b4b3e4e6a..03a6eeaa40c799 100644 --- a/libcxx/docs/Status/Cxx2c.rst +++ b/libcxx/docs/Status/Cxx2c.rst @@ -42,6 +42,7 @@ Paper Status .. [#note-P2510R3] This paper is applied as DR against C++20. (MSVC STL and libstdc++ will do the same.) .. [#note-P3142R0] This paper is applied as DR against C++23. (MSVC STL and libstdc++ will do the same.) .. [#note-P2944R3] Implemented comparisons for ``reference_wrapper`` only. + .. [#note-P2422R1] Libc++ keeps the ``nodiscard`` attributes as a conforming extension. .. _issues-status-cxx2c: diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 8d24457186310c..18cbe8f8b738a7 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -62,7 +62,20 @@ "`4053 `__","Unary call to ``std::views::repeat`` does not decay the argument","Tokyo March 2024","|Complete|","19.0","|ranges|" "`4054 `__","Repeating a ``repeat_view`` should repeat the view","Tokyo March 2024","|Complete|","19.0","|ranges|" "","","","","","" +"`3944 `__","Formatters converting sequences of ``char`` to sequences of ``wchar_t``","St. Louis June 2024","","","|format|" +"`4060 `__","``submdspan`` preconditions do not forbid creating invalid pointer","St. Louis June 2024","","","" +"`4061 `__","Should ``std::basic_format_context`` be default-constructible/copyable/movable?","St. Louis June 2024","","","|format|" +"`4071 `__","``reference_wrapper`` comparisons are not SFINAE-friendly","St. Louis June 2024","|Complete|","19.0","" +"`4074 `__","``compatible-joinable-ranges`` is underconstrained","St. Louis June 2024","","","|ranges|" +"`4076 `__","``concat_view`` should be freestanding","St. Louis June 2024","","","" +"`4079 `__","Missing Preconditions in ``concat_view::iterator``\`s conversion constructor","St. Louis June 2024","","","|ranges|" +"`4082 `__","``views::concat(r)`` is well-formed when ``r`` is an ``output_range``","St. Louis June 2024","","","|ranges|" +"`4083 `__","``views::as_rvalue`` should reject non-input ranges","St. Louis June 2024","","","|ranges|" +"`4096 `__","``views::iota(views::iota(0))`` should be rejected","St. Louis June 2024","","","|ranges|" +"`4098 `__","``views::adjacent<0>`` should reject non-forward ranges","St. Louis June 2024","","","|ranges|" +"`4105 `__","``ranges::ends_with``\`s Returns misses difference casting","St. Louis June 2024","","","|ranges|" +"`4106 `__","``basic_format_args`` should not be default-constructible","St. Louis June 2024","","","|format|" +"","","","","","" "`3343 `__","Ordering of calls to ``unlock()`` and ``notify_all()`` in Effects element of ``notify_all_at_thread_exit()`` should be reversed","Not Yet Adopted","|Complete|","16.0","" "XXXX","","The sys_info range should be affected by save","Not Yet Adopted","|Complete|","19.0" -"`4071 `__","","``reference_wrapper`` comparisons are not SFINAE-friendly","Not Yet Adopted","|Complete|","19.0" "","","","","","" diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index ea060f18acdeea..48d45e24a05b2f 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -63,3 +63,16 @@ "`P2642R6 `__","LWG","Padded ``mdspan`` layouts","Tokyo March 2024","","","" "`P3029R1 `__","LWG","Better ``mdspan``'s CTAD","Tokyo March 2024","|Complete|","19.0","" "","","","","","","" +"`P2747R2 `__","CWG","``constexpr`` placement new","St. Louis June 2024","","","" +"`P2997R1 `__","LWG","Removing the common reference requirement from the indirectly invocable concepts","St. Louis June 2024","","","" +"`P2389R2 `__","LWG","``dextents`` Index Type Parameter","St. Louis June 2024","","","" +"`P3168R2 `__","LWG","Give ``std::optional`` Range Support","St. Louis June 2024","","","|ranges|" +"`P3217R0 `__","LWG","Adjoints to 'Enabling list-initialization for algorithms': find_last","St. Louis June 2024","","","" +"`P2985R0 `__","LWG","A type trait for detecting virtual base classes","St. Louis June 2024","","","" +"`P0843R14 `__","LWG","``inplace_vector``","St. Louis June 2024","","","" +"`P3235R3 `__","LWG","``std::print`` more types faster with less memory","St. Louis June 2024","","","|format| |DR|" +"`P2968R2 `__","LWG","Make ``std::ignore`` a first-class object","St. Louis June 2024","","","" +"`P2075R6 `__","LWG","Philox as an extension of the C++ RNG engines","St. Louis June 2024","","","" +"`P2422R1 `__","LWG","Remove ``nodiscard`` annotations from the standard library specification","St. Louis June 2024","|Complete| [#note-P2422R1]_","19.0","" +"`P2300R10 `__","LWG","``std::execution``","St. Louis June 2024","","","" +"","","","","","","" diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index 743f99297d17d2..69a9e575cfe7c8 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -71,7 +71,7 @@ iOS, watchOS, and tvOS, Google Search, the Android operating system, and FreeBSD user base of over 1 billion daily active users. Since its inception, libc++ has focused on delivering high performance, standards-conformance, and portability. It has -been extensively tested and optimized, making it robust and production ready. libc++ fully implements C++11 and C++14, +been extensively tested and optimized, making it robust and production ready. libc++ fully implements C++11 and C++14, with C++17, C++20, C++23, and C++26 features being actively developed and making steady progress. libc++ is continuously integrated and tested on a wide range of platforms and configurations, ensuring its reliability @@ -137,7 +137,7 @@ Compiler Versions Restrictions Support policy Clang 17, 18, 19-git latest two stable releases per `LLVM's release page `_ and the development version AppleClang 15 latest stable release per `Xcode's release page `_ Open XL 17.1 (AIX) latest stable release per `Open XL's documentation page `_ -GCC 13 In C++11 or later only latest stable release per `GCC's release page `_ +GCC 14 In C++11 or later only latest stable release per `GCC's release page `_ ============ =============== ========================== ===================== Libc++ also supports common platforms and architectures: diff --git a/libcxx/include/__compare/synth_three_way.h b/libcxx/include/__compare/synth_three_way.h index 6420d1362db0ce..e48ce497998368 100644 --- a/libcxx/include/__compare/synth_three_way.h +++ b/libcxx/include/__compare/synth_three_way.h @@ -25,12 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // [expos.only.func] -// TODO MODULES restore the lamba to match the Standard. -// See https://github.com/llvm/llvm-project/issues/57222 -//_LIBCPP_HIDE_FROM_ABI inline constexpr auto __synth_three_way = -// [](const _Tp& __t, const _Up& __u) -template -_LIBCPP_HIDE_FROM_ABI constexpr auto __synth_three_way(const _Tp& __t, const _Up& __u) +_LIBCPP_HIDE_FROM_ABI inline constexpr auto __synth_three_way = [](const _Tp& __t, const _Up& __u) requires requires { { __t < __u } -> __boolean_testable; { __u < __t } -> __boolean_testable; @@ -45,7 +40,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __synth_three_way(const _Tp& __t, const _Up return weak_ordering::greater; return weak_ordering::equivalent; } -} +}; template using __synth_three_way_result = decltype(std::__synth_three_way(std::declval<_Tp&>(), std::declval<_Up&>())); diff --git a/libcxx/include/__configuration/compiler.h b/libcxx/include/__configuration/compiler.h index a9fc3498220ab9..80ece22bb50bd6 100644 --- a/libcxx/include/__configuration/compiler.h +++ b/libcxx/include/__configuration/compiler.h @@ -41,8 +41,8 @@ # warning "Libc++ only supports AppleClang 15 and later" # endif # elif defined(_LIBCPP_GCC_VER) -# if _LIBCPP_GCC_VER < 1300 -# warning "Libc++ only supports GCC 13 and later" +# if _LIBCPP_GCC_VER < 1400 +# warning "Libc++ only supports GCC 14 and later" # endif # endif diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index 2355b4a840d734..8c000c558c5279 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -95,18 +95,4 @@ except that locale_t is used instead of the current global locale. The variadic functions may be implemented as templates with a parameter pack instead of variadic functions. */ -/* -// TODO: These symbols are never actually used, but defined by one or more implementations. They should be removed. -long strtol_l(const char* str, char** str_end, locale_t); -unsigned long strtoul_l(const char* str, char** str_end, locale_t); -long long wcstoll_l(const wchar_t* str, wchar_t** str_end, int base, locale_t); -unsigned long long wcstoull_l(const wchar_t* str, wchar_t** str_end, int base, locale_t); -long double wcstold_l(const wchar_t* str, wchar_t** str_end, int base, locale_t); -int sprintf_l(char* str, const char* format, locale_t, ...); -int vsprintf_l(char* str, const char* format, locale_t, va_list); -int vsnprintf_l(char* str, size_t size, const char* format, locale_t, va_list); -int isblank_l(int ch, locale_t); - -*/ - #endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_H diff --git a/libcxx/include/__locale_dir/locale_base_api/android.h b/libcxx/include/__locale_dir/locale_base_api/android.h index a33d2539f06548..9965d8bbf6a2ec 100644 --- a/libcxx/include/__locale_dir/locale_base_api/android.h +++ b/libcxx/include/__locale_dir/locale_base_api/android.h @@ -10,37 +10,29 @@ #ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H #define _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H -#if defined(__BIONIC__) +#include -# ifdef __cplusplus +// FIXME: Is this actually required? extern "C" { -# endif - -# include -# include - -# ifdef __cplusplus +#include } -# endif -# if defined(__ANDROID__) - -# include -# if __ANDROID_API__ < 21 -# include <__support/xlocale/__posix_l_fallback.h> -# endif +#include +#if __ANDROID_API__ < 21 +# include <__support/xlocale/__posix_l_fallback.h> +#endif // If we do not have this header, we are in a platform build rather than an NDK // build, which will always be at least as new as the ToT NDK, in which case we // don't need any of the inlines below since libc provides them. -# if __has_include() -# include +#if __has_include() +# include // In NDK versions later than 16, locale-aware functions are provided by // legacy_stdlib_inlines.h -# if __NDK_MAJOR__ <= 16 -# if __ANDROID_API__ < 21 -# include <__support/xlocale/__strtonum_fallback.h> -# elif __ANDROID_API__ < 26 +# if __NDK_MAJOR__ <= 16 +# if __ANDROID_API__ < 21 +# include <__support/xlocale/__strtonum_fallback.h> +# elif __ANDROID_API__ < 26 inline _LIBCPP_HIDE_FROM_ABI float strtof_l(const char* __nptr, char** __endptr, locale_t) { return ::strtof(__nptr, __endptr); @@ -50,15 +42,9 @@ inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr return ::strtod(__nptr, __endptr); } -inline _LIBCPP_HIDE_FROM_ABI long strtol_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtol(__nptr, __endptr, __base); -} - -# endif // __ANDROID_API__ < 26 +# endif // __ANDROID_API__ < 26 -# endif // __NDK_MAJOR__ <= 16 -# endif // __has_include() -# endif // defined(__ANDROID__) +# endif // __NDK_MAJOR__ <= 16 +#endif // __has_include() -#endif // defined(__BIONIC__) #endif // _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H diff --git a/libcxx/include/__locale_dir/locale_base_api/fuchsia.h b/libcxx/include/__locale_dir/locale_base_api/fuchsia.h index f999bead234e5c..4c3440f981c6d0 100644 --- a/libcxx/include/__locale_dir/locale_base_api/fuchsia.h +++ b/libcxx/include/__locale_dir/locale_base_api/fuchsia.h @@ -10,13 +10,9 @@ #ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H #define _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H -#if defined(__Fuchsia__) - -# include <__support/xlocale/__posix_l_fallback.h> -# include <__support/xlocale/__strtonum_fallback.h> -# include -# include - -#endif // defined(__Fuchsia__) +#include <__support/xlocale/__posix_l_fallback.h> +#include <__support/xlocale/__strtonum_fallback.h> +#include +#include #endif // _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H diff --git a/libcxx/include/__locale_dir/locale_base_api/ibm.h b/libcxx/include/__locale_dir/locale_base_api/ibm.h index 5e89a1dc1e8a6d..01af20194428b9 100644 --- a/libcxx/include/__locale_dir/locale_base_api/ibm.h +++ b/libcxx/include/__locale_dir/locale_base_api/ibm.h @@ -58,11 +58,6 @@ inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __en return ::strtoll(__nptr, __endptr, __base); } -inline _LIBCPP_HIDE_FROM_ABI long strtol_l(const char* __nptr, char** __endptr, int __base, locale_t locale) { - __setAndRestore __newloc(locale); - return ::strtol(__nptr, __endptr, __base); -} - inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr, locale_t locale) { __setAndRestore __newloc(locale); return ::strtod(__nptr, __endptr); @@ -84,11 +79,6 @@ strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t locale) { return ::strtoull(__nptr, __endptr, __base); } -inline _LIBCPP_HIDE_FROM_ABI unsigned long strtoul_l(const char* __nptr, char** __endptr, int __base, locale_t locale) { - __setAndRestore __newloc(locale); - return ::strtoul(__nptr, __endptr, __base); -} - inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char* fmt, va_list ap) { const size_t buff_size = 256; diff --git a/libcxx/include/__locale_dir/locale_base_api/musl.h b/libcxx/include/__locale_dir/locale_base_api/musl.h index b689200baee71f..bf7b849d586342 100644 --- a/libcxx/include/__locale_dir/locale_base_api/musl.h +++ b/libcxx/include/__locale_dir/locale_base_api/musl.h @@ -28,17 +28,4 @@ inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, c return ::strtoull(__nptr, __endptr, __base); } -inline _LIBCPP_HIDE_FROM_ABI long long wcstoll_l(const wchar_t* __nptr, wchar_t** __endptr, int __base, locale_t) { - return ::wcstoll(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -wcstoull_l(const wchar_t* __nptr, wchar_t** __endptr, int __base, locale_t) { - return ::wcstoull(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI long double wcstold_l(const wchar_t* __nptr, wchar_t** __endptr, locale_t) { - return ::wcstold(__nptr, __endptr); -} - #endif // _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H diff --git a/libcxx/include/__locale_dir/locale_base_api/newlib.h b/libcxx/include/__locale_dir/locale_base_api/newlib.h index 8d030cb73209f8..a8c1cff16e6d80 100644 --- a/libcxx/include/__locale_dir/locale_base_api/newlib.h +++ b/libcxx/include/__locale_dir/locale_base_api/newlib.h @@ -9,14 +9,4 @@ #ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H #define _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H -#if defined(_NEWLIB_VERSION) - -# if !defined(__NEWLIB__) || __NEWLIB__ < 2 || __NEWLIB__ == 2 && __NEWLIB_MINOR__ < 5 -# include <__support/xlocale/__nop_locale_mgmt.h> -# include <__support/xlocale/__posix_l_fallback.h> -# include <__support/xlocale/__strtonum_fallback.h> -# endif - -#endif // _NEWLIB_VERSION - #endif // _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H diff --git a/libcxx/include/__locale_dir/locale_base_api/openbsd.h b/libcxx/include/__locale_dir/locale_base_api/openbsd.h index ad999f5dfcad65..0c05d6a0f78874 100644 --- a/libcxx/include/__locale_dir/locale_base_api/openbsd.h +++ b/libcxx/include/__locale_dir/locale_base_api/openbsd.h @@ -16,12 +16,4 @@ #include #include -inline _LIBCPP_HIDE_FROM_ABI long strtol_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtol(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long strtoul_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtoul(__nptr, __endptr, __base); -} - #endif // _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H diff --git a/libcxx/include/__locale_dir/locale_base_api/win32.h b/libcxx/include/__locale_dir/locale_base_api/win32.h index 5dfacfb9c1ee9d..f66baffb692045 100644 --- a/libcxx/include/__locale_dir/locale_base_api/win32.h +++ b/libcxx/include/__locale_dir/locale_base_api/win32.h @@ -225,15 +225,11 @@ _LIBCPP_EXPORTED_FROM_ABI size_t strftime_l(char* ret, size_t n, const char* for # define strftime_l _strftime_l #endif #define sscanf_l(__s, __l, __f, ...) _sscanf_l(__s, __f, __l, __VA_ARGS__) -#define sprintf_l(__s, __l, __f, ...) _sprintf_l(__s, __f, __l, __VA_ARGS__) -#define vsprintf_l(__s, __l, __f, ...) _vsprintf_l(__s, __f, __l, __VA_ARGS__) -#define vsnprintf_l(__s, __n, __l, __f, ...) _vsnprintf_l(__s, __n, __f, __l, __VA_ARGS__) _LIBCPP_EXPORTED_FROM_ABI int snprintf_l(char* __ret, size_t __n, locale_t __loc, const char* __format, ...); _LIBCPP_EXPORTED_FROM_ABI int asprintf_l(char** __ret, locale_t __loc, const char* __format, ...); _LIBCPP_EXPORTED_FROM_ABI int vasprintf_l(char** __ret, locale_t __loc, const char* __format, va_list __ap); // not-so-pressing FIXME: use locale to determine blank characters -inline int isblank_l(int __c, locale_t /*loc*/) { return (__c == ' ' || __c == '\t'); } inline int iswblank_l(wint_t __c, locale_t /*loc*/) { return (__c == L' ' || __c == L'\t'); } #endif // _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H diff --git a/libcxx/include/__support/xlocale/__posix_l_fallback.h b/libcxx/include/__support/xlocale/__posix_l_fallback.h index 9c3c99e37ecc0c..8a3a6f27f48dde 100644 --- a/libcxx/include/__support/xlocale/__posix_l_fallback.h +++ b/libcxx/include/__support/xlocale/__posix_l_fallback.h @@ -29,8 +29,6 @@ inline _LIBCPP_HIDE_FROM_ABI int isalnum_l(int __c, locale_t) { return ::isalnum inline _LIBCPP_HIDE_FROM_ABI int isalpha_l(int __c, locale_t) { return ::isalpha(__c); } -inline _LIBCPP_HIDE_FROM_ABI int isblank_l(int __c, locale_t) { return ::isblank(__c); } - inline _LIBCPP_HIDE_FROM_ABI int iscntrl_l(int __c, locale_t) { return ::iscntrl(__c); } inline _LIBCPP_HIDE_FROM_ABI int isdigit_l(int __c, locale_t) { return ::isdigit(__c); } diff --git a/libcxx/include/__support/xlocale/__strtonum_fallback.h b/libcxx/include/__support/xlocale/__strtonum_fallback.h index 5dd59500c592b1..b7eef5210ed374 100644 --- a/libcxx/include/__support/xlocale/__strtonum_fallback.h +++ b/libcxx/include/__support/xlocale/__strtonum_fallback.h @@ -42,19 +42,4 @@ inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, c return ::strtoull(__nptr, __endptr, __base); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -inline _LIBCPP_HIDE_FROM_ABI long long wcstoll_l(const wchar_t* __nptr, wchar_t** __endptr, int __base, locale_t) { - return ::wcstoll(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -wcstoull_l(const wchar_t* __nptr, wchar_t** __endptr, int __base, locale_t) { - return ::wcstoull(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI long double wcstold_l(const wchar_t* __nptr, wchar_t** __endptr, locale_t) { - return ::wcstold(__nptr, __endptr); -} -#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS - #endif // _LIBCPP___SUPPORT_XLOCALE_STRTONUM_FALLBACK_H diff --git a/libcxx/include/array b/libcxx/include/array index accfe15533e98a..6ffde852f48027 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -423,7 +423,7 @@ template _LIBCPP_HIDE_FROM_ABI constexpr __synth_three_way_result<_Tp> operator<=>(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) { return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way<_Tp, _Tp>); + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/chrono b/libcxx/include/chrono index c66771ffbad1a7..23441ddb239865 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -996,7 +996,9 @@ constexpr chrono::year operator ""y(unsigned lo #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER == 20 # include -# include +# if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +# endif # include #endif diff --git a/libcxx/include/deque b/libcxx/include/deque index aee4764859dd20..4fc994a6e229b8 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -2531,7 +2531,7 @@ template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Tp> operator<=>(const deque<_Tp, _Allocator>& __x, const deque<_Tp, _Allocator>& __y) { return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way<_Tp, _Tp>); + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 3731d3f6cf6d16..1ae19d23f88cc8 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -1518,7 +1518,7 @@ template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Tp> operator<=>(const forward_list<_Tp, _Allocator>& __x, const forward_list<_Tp, _Allocator>& __y) { return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way<_Tp, _Tp>); + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // #if _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/list b/libcxx/include/list index 1678559a841dde..929c84de7be449 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -1680,7 +1680,7 @@ template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Tp> operator<=>(const list<_Tp, _Allocator>& __x, const list<_Tp, _Allocator>& __y) { return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way<_Tp, _Tp>); + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/map b/libcxx/include/map index 4b2f3fc71cbfea..02bd17ccb4e8cb 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -1617,12 +1617,7 @@ operator<=(const map<_Key, _Tp, _Compare, _Allocator>& __x, const map<_Key, _Tp, template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result> operator<=>(const map<_Key, _Tp, _Compare, _Allocator>& __x, const map<_Key, _Tp, _Compare, _Allocator>& __y) { - return std::lexicographical_compare_three_way( - __x.begin(), - __x.end(), - __y.begin(), - __y.end(), - std::__synth_three_way, pair>); + return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // #if _LIBCPP_STD_VER <= 17 @@ -2136,12 +2131,7 @@ template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result> operator<=>(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap<_Key, _Tp, _Compare, _Allocator>& __y) { - return std::lexicographical_compare_three_way( - __x.begin(), - __x.end(), - __y.begin(), - __y.end(), - std::__synth_three_way, pair>); + return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), __synth_three_way); } #endif // #if _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/set b/libcxx/include/set index 9a2eb12d0a25ab..94533583798699 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -993,8 +993,7 @@ operator<=(const set<_Key, _Compare, _Allocator>& __x, const set<_Key, _Compare, template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Key> operator<=>(const set<_Key, _Allocator>& __x, const set<_Key, _Allocator>& __y) { - return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way<_Key, _Key>); + return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 @@ -1454,7 +1453,7 @@ template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Key> operator<=>(const multiset<_Key, _Allocator>& __x, const multiset<_Key, _Allocator>& __y) { return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way<_Key, _Key>); + __x.begin(), __x.end(), __y.begin(), __y.end(), __synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/vector b/libcxx/include/vector index 299ad8c9b23f28..aaf51d18fe30fb 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -2903,7 +2903,7 @@ template _LIBCPP_HIDE_FROM_ABI constexpr __synth_three_way_result<_Tp> operator<=>(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y) { return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way<_Tp, _Tp>); + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/version b/libcxx/include/version index cac6eaa3b6e880..460af8b6d396c3 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -70,6 +70,7 @@ __cpp_lib_constexpr_functional 201907L __cpp_lib_constexpr_iterator 201811L __cpp_lib_constexpr_memory 202202L 201811L // C++20 +__cpp_lib_constexpr_new 202406L __cpp_lib_constexpr_numeric 201911L __cpp_lib_constexpr_string 201907L __cpp_lib_constexpr_string_view 201811L @@ -125,6 +126,7 @@ __cpp_lib_has_unique_object_representations 201606L __cpp_lib_hazard_pointer 202306L __cpp_lib_hypot 201603L __cpp_lib_incomplete_container_elements 201505L +__cpp_lib_inplace_vector 202406L __cpp_lib_int_pow2 202002L __cpp_lib_integer_comparison_functions 202002L __cpp_lib_integer_sequence 201304L @@ -143,6 +145,7 @@ __cpp_lib_is_null_pointer 201309L __cpp_lib_is_pointer_interconvertible 201907L __cpp_lib_is_scoped_enum 202011L __cpp_lib_is_swappable 201603L +__cpp_lib_is_virtual_base_of 202406L __cpp_lib_is_within_lifetime 202306L __cpp_lib_jthread 201911L __cpp_lib_latch 201907L @@ -170,9 +173,11 @@ __cpp_lib_not_fn 201603L __cpp_lib_null_iterators 201304L __cpp_lib_optional 202110L 201606L // C++17 +__cpp_lib_optional_range_support 202406L __cpp_lib_out_ptr 202311L 202106L // C++23 __cpp_lib_parallel_algorithm 201603L +__cpp_lib_philox_engine 202406L __cpp_lib_polymorphic_allocator 201902L __cpp_lib_print 202207L __cpp_lib_quoted_string_io 201304L @@ -203,6 +208,7 @@ __cpp_lib_sample 201603L __cpp_lib_saturation_arithmetic 202311L __cpp_lib_scoped_lock 201703L __cpp_lib_semaphore 201907L +__cpp_lib_senders 202406L __cpp_lib_shared_mutex 201505L __cpp_lib_shared_ptr_arrays 201707L 201611L // C++17 @@ -500,6 +506,7 @@ __cpp_lib_void_t 201411L # undef __cpp_lib_bind_front # define __cpp_lib_bind_front 202306L # define __cpp_lib_bitset 202306L +// # define __cpp_lib_constexpr_new 202406L // # define __cpp_lib_constrained_equality 202403L // # define __cpp_lib_copyable_function 202306L // # define __cpp_lib_debugging 202311L @@ -519,15 +526,20 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_function_ref 202306L // # define __cpp_lib_generate_random 202403L // # define __cpp_lib_hazard_pointer 202306L +// # define __cpp_lib_inplace_vector 202406L +// # define __cpp_lib_is_virtual_base_of 202406L // # define __cpp_lib_is_within_lifetime 202306L // # define __cpp_lib_linalg 202311L +// # define __cpp_lib_optional_range_support 202406L # undef __cpp_lib_out_ptr // # define __cpp_lib_out_ptr 202311L +// # define __cpp_lib_philox_engine 202406L // # define __cpp_lib_ranges_concat 202403L # define __cpp_lib_ratio 202306L // # define __cpp_lib_rcu 202306L # define __cpp_lib_reference_wrapper 202403L # define __cpp_lib_saturation_arithmetic 202311L +// # define __cpp_lib_senders 202406L // # define __cpp_lib_smart_ptr_owner_equality 202306L # define __cpp_lib_span_at 202311L # define __cpp_lib_span_initializer_list 202311L diff --git a/libcxx/modules/std.compat.cppm.in b/libcxx/modules/std.compat.cppm.in index b44dbab25c74b4..0f547a2dc8b715 100644 --- a/libcxx/modules/std.compat.cppm.in +++ b/libcxx/modules/std.compat.cppm.in @@ -69,6 +69,9 @@ module; # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() +# if __has_include() +# error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" +# endif // __has_include() # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() diff --git a/libcxx/modules/std.cppm.in b/libcxx/modules/std.cppm.in index b8d89130aae989..ad8a639b7f71a1 100644 --- a/libcxx/modules/std.cppm.in +++ b/libcxx/modules/std.cppm.in @@ -191,6 +191,9 @@ module; # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() +# if __has_include() +# error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" +# endif // __has_include() # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index b34f9e614ae0b3..0ae58a10c879c8 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -342,16 +342,16 @@ endif() if (LIBCXX_ENABLE_LOCALIZATION AND LIBCXX_ENABLE_FILESYSTEM AND LIBCXX_ENABLE_TIME_ZONE_DATABASE) list(APPEND LIBCXX_EXPERIMENTAL_SOURCES - include/tzdb/time_zone_private.h - include/tzdb/types_private.h - include/tzdb/tzdb_list_private.h - include/tzdb/tzdb_private.h + experimental/include/tzdb/time_zone_private.h + experimental/include/tzdb/types_private.h + experimental/include/tzdb/tzdb_list_private.h + experimental/include/tzdb/tzdb_private.h # TODO TZDB The exception could be moved in chrono once the TZDB library # is no longer experimental. - chrono_exception.cpp - time_zone.cpp - tzdb.cpp - tzdb_list.cpp + experimental/chrono_exception.cpp + experimental/time_zone.cpp + experimental/tzdb.cpp + experimental/tzdb_list.cpp ) endif() diff --git a/libcxx/src/chrono_exception.cpp b/libcxx/src/experimental/chrono_exception.cpp similarity index 100% rename from libcxx/src/chrono_exception.cpp rename to libcxx/src/experimental/chrono_exception.cpp diff --git a/libcxx/src/include/tzdb/time_zone_private.h b/libcxx/src/experimental/include/tzdb/time_zone_private.h similarity index 100% rename from libcxx/src/include/tzdb/time_zone_private.h rename to libcxx/src/experimental/include/tzdb/time_zone_private.h diff --git a/libcxx/src/include/tzdb/types_private.h b/libcxx/src/experimental/include/tzdb/types_private.h similarity index 100% rename from libcxx/src/include/tzdb/types_private.h rename to libcxx/src/experimental/include/tzdb/types_private.h diff --git a/libcxx/src/include/tzdb/tzdb_list_private.h b/libcxx/src/experimental/include/tzdb/tzdb_list_private.h similarity index 100% rename from libcxx/src/include/tzdb/tzdb_list_private.h rename to libcxx/src/experimental/include/tzdb/tzdb_list_private.h diff --git a/libcxx/src/include/tzdb/tzdb_private.h b/libcxx/src/experimental/include/tzdb/tzdb_private.h similarity index 100% rename from libcxx/src/include/tzdb/tzdb_private.h rename to libcxx/src/experimental/include/tzdb/tzdb_private.h diff --git a/libcxx/src/time_zone.cpp b/libcxx/src/experimental/time_zone.cpp similarity index 100% rename from libcxx/src/time_zone.cpp rename to libcxx/src/experimental/time_zone.cpp diff --git a/libcxx/src/tzdb.cpp b/libcxx/src/experimental/tzdb.cpp similarity index 100% rename from libcxx/src/tzdb.cpp rename to libcxx/src/experimental/tzdb.cpp diff --git a/libcxx/src/tzdb_list.cpp b/libcxx/src/experimental/tzdb_list.cpp similarity index 100% rename from libcxx/src/tzdb_list.cpp rename to libcxx/src/experimental/tzdb_list.cpp diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index e6adda3b02b51c..4c2b483914f47e 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -14,9 +14,6 @@ // TODO: Investigate these failures which break the CI. // UNSUPPORTED: clang-17, clang-18, clang-19 -// TODO: Investigate this failure on GCC 13 (in Ubuntu Jammy) -// UNSUPPORTED: gcc-13 - // The Android libc++ tests are run on a non-Android host, connected to an // Android device over adb. gdb needs special support to make this work (e.g. // gdbclient.py, ndk-gdb.py, gdbserver), and the Android organization doesn't diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.db/rules.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.db/rules.pass.cpp index 73f4dbd59af9ae..7d9759320c535b 100644 --- a/libcxx/test/libcxx/time/time.zone/time.zone.db/rules.pass.cpp +++ b/libcxx/test/libcxx/time/time.zone/time.zone.db/rules.pass.cpp @@ -17,7 +17,7 @@ // Tests the IANA database rules parsing and operations. // This is not part of the public tzdb interface. // The test uses private implementation headers. -// ADDITIONAL_COMPILE_FLAGS: -I %{libcxx-dir}/src/include +// ADDITIONAL_COMPILE_FLAGS: -I %{libcxx-dir}/src/experimental/include #include #include diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.db/zones.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.db/zones.pass.cpp index 6d436d61357b39..ded89ed808e170 100644 --- a/libcxx/test/libcxx/time/time.zone/time.zone.db/zones.pass.cpp +++ b/libcxx/test/libcxx/time/time.zone/time.zone.db/zones.pass.cpp @@ -17,7 +17,7 @@ // Tests the IANA database zones parsing and operations. // This is not part of the public tzdb interface. // The test uses private implementation headers. -// ADDITIONAL_COMPILE_FLAGS: -I %{libcxx-dir}/src/include +// ADDITIONAL_COMPILE_FLAGS: -I %{libcxx-dir}/src/experimental/include #include #include diff --git a/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp b/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp index 6e6cddfcee2bbf..2d5ece6cb66504 100644 --- a/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp +++ b/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp @@ -8,11 +8,6 @@ // -// https://buildkite.com/llvm-project/libcxx-ci/builds/15823#0184fc0b-d56b-4774-9e1d-35fe24e09e37 -// It seems like the CI gcc version is buggy. I can't reproduce the failure on my system or on -// godbolt (https://godbolt.org/z/rsPv8e8fn). -// UNSUPPORTED: gcc-13 - #include #include #include diff --git a/libcxx/test/std/containers/views/mdspan/mdspan/index_operator.pass.cpp b/libcxx/test/std/containers/views/mdspan/mdspan/index_operator.pass.cpp index 2b843d6e4eb8ee..ffb10c007222df 100644 --- a/libcxx/test/std/containers/views/mdspan/mdspan/index_operator.pass.cpp +++ b/libcxx/test/std/containers/views/mdspan/mdspan/index_operator.pass.cpp @@ -122,8 +122,8 @@ constexpr void test_layout() { test_iteration(construct_mapping(Layout(), std::extents())); test_iteration(construct_mapping(Layout(), std::extents(1, 1, 1, 1))); -// TODO enable for GCC 13, when the CI pipeline is switched, doesn't work with GCC 12 -#if defined(__clang_major__) && __clang_major__ >= 17 +// TODO(LLVM 20): Enable this once AppleClang is upgraded +#ifndef TEST_COMPILER_APPLE_CLANG int data[1]; // Check operator constraint for number of arguments static_assert(check_operator_constraints(std::mdspan(data, construct_mapping(Layout(), std::extents(1))), 0)); @@ -216,7 +216,7 @@ constexpr void test_layout() { assert(!check_operator_constraints(std::mdspan(data, construct_mapping(Layout(), std::extents(1))), s)); } } -#endif +#endif // TEST_COMPILER_APPLE_CLANG } template diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/execution.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/execution.version.compile.pass.cpp index 047248573de408..d3c6d7b158ebaf 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/execution.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/execution.version.compile.pass.cpp @@ -18,6 +18,7 @@ /* Constant Value __cpp_lib_execution 201603L [C++17] 201902L [C++20] + __cpp_lib_senders 202406L [C++26] */ #include @@ -29,12 +30,20 @@ # error "__cpp_lib_execution should not be defined before c++17" # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + #elif TEST_STD_VER == 14 # ifdef __cpp_lib_execution # error "__cpp_lib_execution should not be defined before c++17" # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + #elif TEST_STD_VER == 17 # if !defined(_LIBCPP_VERSION) @@ -50,6 +59,10 @@ # endif # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + #elif TEST_STD_VER == 20 # if !defined(_LIBCPP_VERSION) @@ -65,6 +78,10 @@ # endif # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + #elif TEST_STD_VER == 23 # if !defined(_LIBCPP_VERSION) @@ -80,6 +97,10 @@ # endif # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + #elif TEST_STD_VER > 23 # if !defined(_LIBCPP_VERSION) @@ -95,5 +116,18 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_senders +# error "__cpp_lib_senders should be defined in c++26" +# endif +# if __cpp_lib_senders != 202406L +# error "__cpp_lib_senders should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined because it is unimplemented in libc++!" +# endif +# endif + #endif // TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/new.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/new.version.compile.pass.cpp index feb88bfb37e89b..17c1bd71eee675 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/new.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/new.version.compile.pass.cpp @@ -16,6 +16,7 @@ // Test the feature test macros defined by /* Constant Value + __cpp_lib_constexpr_new 202406L [C++26] __cpp_lib_destroying_delete 201806L [C++20] __cpp_lib_hardware_interference_size 201703L [C++17] __cpp_lib_launder 201606L [C++17] @@ -26,6 +27,10 @@ #if TEST_STD_VER < 14 +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++20" # endif @@ -40,6 +45,10 @@ #elif TEST_STD_VER == 14 +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++20" # endif @@ -54,6 +63,10 @@ #elif TEST_STD_VER == 17 +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++20" # endif @@ -80,6 +93,10 @@ #elif TEST_STD_VER == 20 +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # ifndef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should be defined in c++20" @@ -115,6 +132,10 @@ #elif TEST_STD_VER == 23 +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # ifndef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should be defined in c++23" @@ -150,6 +171,19 @@ #elif TEST_STD_VER > 23 +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should be defined in c++26" +# endif +# if __cpp_lib_constexpr_new != 202406L +# error "__cpp_lib_constexpr_new should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # ifndef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should be defined in c++26" diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp index 15350a949e1685..f265be091f79b5 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp @@ -15,11 +15,12 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_constrained_equality 202403L [C++26] - __cpp_lib_freestanding_optional 202311L [C++26] - __cpp_lib_optional 201606L [C++17] - 202110L [C++23] +/* Constant Value + __cpp_lib_constrained_equality 202403L [C++26] + __cpp_lib_freestanding_optional 202311L [C++26] + __cpp_lib_optional 201606L [C++17] + 202110L [C++23] + __cpp_lib_optional_range_support 202406L [C++26] */ #include @@ -39,6 +40,10 @@ # error "__cpp_lib_optional should not be defined before c++17" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + #elif TEST_STD_VER == 14 # ifdef __cpp_lib_constrained_equality @@ -53,6 +58,10 @@ # error "__cpp_lib_optional should not be defined before c++17" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + #elif TEST_STD_VER == 17 # ifdef __cpp_lib_constrained_equality @@ -70,6 +79,10 @@ # error "__cpp_lib_optional should have the value 201606L in c++17" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + #elif TEST_STD_VER == 20 # ifdef __cpp_lib_constrained_equality @@ -87,6 +100,10 @@ # error "__cpp_lib_optional should have the value 201606L in c++20" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + #elif TEST_STD_VER == 23 # ifdef __cpp_lib_constrained_equality @@ -104,6 +121,10 @@ # error "__cpp_lib_optional should have the value 202110L in c++23" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + #elif TEST_STD_VER > 23 # if !defined(_LIBCPP_VERSION) @@ -139,5 +160,18 @@ # error "__cpp_lib_optional should have the value 202110L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should be defined in c++26" +# endif +# if __cpp_lib_optional_range_support != 202406L +# error "__cpp_lib_optional_range_support should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined because it is unimplemented in libc++!" +# endif +# endif + #endif // TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/random.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/random.version.compile.pass.cpp index 1f138d948cfce0..2e36c73172cd4d 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/random.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/random.version.compile.pass.cpp @@ -17,6 +17,7 @@ /* Constant Value __cpp_lib_generate_random 202403L [C++26] + __cpp_lib_philox_engine 202406L [C++26] */ #include @@ -28,30 +29,50 @@ # error "__cpp_lib_generate_random should not be defined before c++26" # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + #elif TEST_STD_VER == 14 # ifdef __cpp_lib_generate_random # error "__cpp_lib_generate_random should not be defined before c++26" # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + #elif TEST_STD_VER == 17 # ifdef __cpp_lib_generate_random # error "__cpp_lib_generate_random should not be defined before c++26" # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + #elif TEST_STD_VER == 20 # ifdef __cpp_lib_generate_random # error "__cpp_lib_generate_random should not be defined before c++26" # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + #elif TEST_STD_VER == 23 # ifdef __cpp_lib_generate_random # error "__cpp_lib_generate_random should not be defined before c++26" # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + #elif TEST_STD_VER > 23 # if !defined(_LIBCPP_VERSION) @@ -67,5 +88,18 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should be defined in c++26" +# endif +# if __cpp_lib_philox_engine != 202406L +# error "__cpp_lib_philox_engine should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined because it is unimplemented in libc++!" +# endif +# endif + #endif // TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp index 98277f832f5a04..bb69ca7368aafa 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp @@ -30,6 +30,7 @@ __cpp_lib_is_pointer_interconvertible 201907L [C++20] __cpp_lib_is_scoped_enum 202011L [C++23] __cpp_lib_is_swappable 201603L [C++17] + __cpp_lib_is_virtual_base_of 202406L [C++26] __cpp_lib_is_within_lifetime 202306L [C++26] __cpp_lib_logical_traits 201510L [C++17] __cpp_lib_reference_from_temporary 202202L [C++23] @@ -102,6 +103,10 @@ # error "__cpp_lib_is_swappable should not be defined before c++17" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -205,6 +210,10 @@ # error "__cpp_lib_is_swappable should not be defined before c++17" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -329,6 +338,10 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++17" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -489,6 +502,10 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++20" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -658,6 +675,10 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++23" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -836,6 +857,19 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should be defined in c++26" +# endif +# if __cpp_lib_is_virtual_base_of != 202406L +# error "__cpp_lib_is_virtual_base_of should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should be defined in c++26" diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 32ed30e21cbbe3..9a5a10a8fb0f5a 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -65,6 +65,7 @@ __cpp_lib_constexpr_iterator 201811L [C++20] __cpp_lib_constexpr_memory 201811L [C++20] 202202L [C++23] + __cpp_lib_constexpr_new 202406L [C++26] __cpp_lib_constexpr_numeric 201911L [C++20] __cpp_lib_constexpr_string 201907L [C++20] __cpp_lib_constexpr_string_view 201811L [C++20] @@ -112,6 +113,7 @@ __cpp_lib_hazard_pointer 202306L [C++26] __cpp_lib_hypot 201603L [C++17] __cpp_lib_incomplete_container_elements 201505L [C++17] + __cpp_lib_inplace_vector 202406L [C++26] __cpp_lib_int_pow2 202002L [C++20] __cpp_lib_integer_comparison_functions 202002L [C++20] __cpp_lib_integer_sequence 201304L [C++14] @@ -130,6 +132,7 @@ __cpp_lib_is_pointer_interconvertible 201907L [C++20] __cpp_lib_is_scoped_enum 202011L [C++23] __cpp_lib_is_swappable 201603L [C++17] + __cpp_lib_is_virtual_base_of 202406L [C++26] __cpp_lib_is_within_lifetime 202306L [C++26] __cpp_lib_jthread 201911L [C++20] __cpp_lib_latch 201907L [C++20] @@ -154,9 +157,11 @@ __cpp_lib_null_iterators 201304L [C++14] __cpp_lib_optional 201606L [C++17] 202110L [C++23] + __cpp_lib_optional_range_support 202406L [C++26] __cpp_lib_out_ptr 202106L [C++23] 202311L [C++26] __cpp_lib_parallel_algorithm 201603L [C++17] + __cpp_lib_philox_engine 202406L [C++26] __cpp_lib_polymorphic_allocator 201902L [C++20] __cpp_lib_print 202207L [C++23] __cpp_lib_quoted_string_io 201304L [C++14] @@ -186,6 +191,7 @@ __cpp_lib_saturation_arithmetic 202311L [C++26] __cpp_lib_scoped_lock 201703L [C++17] __cpp_lib_semaphore 201907L [C++20] + __cpp_lib_senders 202406L [C++26] __cpp_lib_shared_mutex 201505L [C++17] __cpp_lib_shared_ptr_arrays 201611L [C++17] 201707L [C++20] @@ -425,6 +431,10 @@ # error "__cpp_lib_constexpr_memory should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # ifdef __cpp_lib_constexpr_numeric # error "__cpp_lib_constexpr_numeric should not be defined before c++20" # endif @@ -609,6 +619,10 @@ # error "__cpp_lib_incomplete_container_elements should not be defined before c++17" # endif +# ifdef __cpp_lib_inplace_vector +# error "__cpp_lib_inplace_vector should not be defined before c++26" +# endif + # ifdef __cpp_lib_int_pow2 # error "__cpp_lib_int_pow2 should not be defined before c++20" # endif @@ -681,6 +695,10 @@ # error "__cpp_lib_is_swappable should not be defined before c++17" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -773,6 +791,10 @@ # error "__cpp_lib_optional should not be defined before c++17" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + # ifdef __cpp_lib_out_ptr # error "__cpp_lib_out_ptr should not be defined before c++23" # endif @@ -781,6 +803,10 @@ # error "__cpp_lib_parallel_algorithm should not be defined before c++17" # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + # ifdef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should not be defined before c++20" # endif @@ -897,6 +923,10 @@ # error "__cpp_lib_semaphore should not be defined before c++20" # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + # ifdef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should not be defined before c++17" # endif @@ -1261,6 +1291,10 @@ # error "__cpp_lib_constexpr_memory should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # ifdef __cpp_lib_constexpr_numeric # error "__cpp_lib_constexpr_numeric should not be defined before c++20" # endif @@ -1451,6 +1485,10 @@ # error "__cpp_lib_incomplete_container_elements should not be defined before c++17" # endif +# ifdef __cpp_lib_inplace_vector +# error "__cpp_lib_inplace_vector should not be defined before c++26" +# endif + # ifdef __cpp_lib_int_pow2 # error "__cpp_lib_int_pow2 should not be defined before c++20" # endif @@ -1535,6 +1573,10 @@ # error "__cpp_lib_is_swappable should not be defined before c++17" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -1636,6 +1678,10 @@ # error "__cpp_lib_optional should not be defined before c++17" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + # ifdef __cpp_lib_out_ptr # error "__cpp_lib_out_ptr should not be defined before c++23" # endif @@ -1644,6 +1690,10 @@ # error "__cpp_lib_parallel_algorithm should not be defined before c++17" # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + # ifdef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should not be defined before c++20" # endif @@ -1775,6 +1825,10 @@ # error "__cpp_lib_semaphore should not be defined before c++20" # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + # ifdef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should not be defined before c++17" # endif @@ -2199,6 +2253,10 @@ # error "__cpp_lib_constexpr_memory should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # ifdef __cpp_lib_constexpr_numeric # error "__cpp_lib_constexpr_numeric should not be defined before c++20" # endif @@ -2431,6 +2489,10 @@ # error "__cpp_lib_incomplete_container_elements should have the value 201505L in c++17" # endif +# ifdef __cpp_lib_inplace_vector +# error "__cpp_lib_inplace_vector should not be defined before c++26" +# endif + # ifdef __cpp_lib_int_pow2 # error "__cpp_lib_int_pow2 should not be defined before c++20" # endif @@ -2527,6 +2589,10 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++17" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -2670,6 +2736,10 @@ # error "__cpp_lib_optional should have the value 201606L in c++17" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + # ifdef __cpp_lib_out_ptr # error "__cpp_lib_out_ptr should not be defined before c++23" # endif @@ -2687,6 +2757,10 @@ # endif # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + # ifdef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should not be defined before c++20" # endif @@ -2833,6 +2907,10 @@ # error "__cpp_lib_semaphore should not be defined before c++20" # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + # if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++17" @@ -3398,6 +3476,10 @@ # error "__cpp_lib_constexpr_memory should have the value 201811L in c++20" # endif +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # ifndef __cpp_lib_constexpr_numeric # error "__cpp_lib_constexpr_numeric should be defined in c++20" # endif @@ -3681,6 +3763,10 @@ # error "__cpp_lib_incomplete_container_elements should have the value 201505L in c++20" # endif +# ifdef __cpp_lib_inplace_vector +# error "__cpp_lib_inplace_vector should not be defined before c++26" +# endif + # ifndef __cpp_lib_int_pow2 # error "__cpp_lib_int_pow2 should be defined in c++20" # endif @@ -3810,6 +3896,10 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++20" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -3980,6 +4070,10 @@ # error "__cpp_lib_optional should have the value 201606L in c++20" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + # ifdef __cpp_lib_out_ptr # error "__cpp_lib_out_ptr should not be defined before c++23" # endif @@ -3997,6 +4091,10 @@ # endif # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + # if !defined(_LIBCPP_VERSION) || _LIBCPP_AVAILABILITY_HAS_PMR # ifndef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should be defined in c++20" @@ -4167,6 +4265,10 @@ # endif # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + # if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++20" @@ -4816,6 +4918,10 @@ # error "__cpp_lib_constexpr_memory should have the value 202202L in c++23" # endif +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined before c++26" +# endif + # ifndef __cpp_lib_constexpr_numeric # error "__cpp_lib_constexpr_numeric should be defined in c++23" # endif @@ -5123,6 +5229,10 @@ # error "__cpp_lib_incomplete_container_elements should have the value 201505L in c++23" # endif +# ifdef __cpp_lib_inplace_vector +# error "__cpp_lib_inplace_vector should not be defined before c++26" +# endif + # ifndef __cpp_lib_int_pow2 # error "__cpp_lib_int_pow2 should be defined in c++23" # endif @@ -5261,6 +5371,10 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++23" # endif +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined before c++26" +# endif + # ifdef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should not be defined before c++26" # endif @@ -5446,6 +5560,10 @@ # error "__cpp_lib_optional should have the value 202110L in c++23" # endif +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined before c++26" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_out_ptr # error "__cpp_lib_out_ptr should be defined in c++23" @@ -5472,6 +5590,10 @@ # endif # endif +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined before c++26" +# endif + # if !defined(_LIBCPP_VERSION) || _LIBCPP_AVAILABILITY_HAS_PMR # ifndef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should be defined in c++23" @@ -5726,6 +5848,10 @@ # endif # endif +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined before c++26" +# endif + # if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++23" @@ -6438,6 +6564,19 @@ # error "__cpp_lib_constexpr_memory should have the value 202202L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should be defined in c++26" +# endif +# if __cpp_lib_constexpr_new != 202406L +# error "__cpp_lib_constexpr_new should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constexpr_new +# error "__cpp_lib_constexpr_new should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_constexpr_numeric # error "__cpp_lib_constexpr_numeric should be defined in c++26" # endif @@ -6898,6 +7037,19 @@ # error "__cpp_lib_incomplete_container_elements should have the value 201505L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_inplace_vector +# error "__cpp_lib_inplace_vector should be defined in c++26" +# endif +# if __cpp_lib_inplace_vector != 202406L +# error "__cpp_lib_inplace_vector should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_inplace_vector +# error "__cpp_lib_inplace_vector should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_int_pow2 # error "__cpp_lib_int_pow2 should be defined in c++26" # endif @@ -7036,6 +7188,19 @@ # error "__cpp_lib_is_swappable should have the value 201603L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should be defined in c++26" +# endif +# if __cpp_lib_is_virtual_base_of != 202406L +# error "__cpp_lib_is_virtual_base_of should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_is_virtual_base_of +# error "__cpp_lib_is_virtual_base_of should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_is_within_lifetime # error "__cpp_lib_is_within_lifetime should be defined in c++26" @@ -7239,6 +7404,19 @@ # error "__cpp_lib_optional should have the value 202110L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should be defined in c++26" +# endif +# if __cpp_lib_optional_range_support != 202406L +# error "__cpp_lib_optional_range_support should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_optional_range_support +# error "__cpp_lib_optional_range_support should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_out_ptr # error "__cpp_lib_out_ptr should be defined in c++26" @@ -7265,6 +7443,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should be defined in c++26" +# endif +# if __cpp_lib_philox_engine != 202406L +# error "__cpp_lib_philox_engine should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_philox_engine +# error "__cpp_lib_philox_engine should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_VERSION) || _LIBCPP_AVAILABILITY_HAS_PMR # ifndef __cpp_lib_polymorphic_allocator # error "__cpp_lib_polymorphic_allocator should be defined in c++26" @@ -7546,6 +7737,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_senders +# error "__cpp_lib_senders should be defined in c++26" +# endif +# if __cpp_lib_senders != 202406L +# error "__cpp_lib_senders should have the value 202406L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_senders +# error "__cpp_lib_senders should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if !defined(_LIBCPP_HAS_NO_THREADS) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++26" diff --git a/libcxx/test/std/numerics/complex.number/complex/bit_cast.pass.cpp b/libcxx/test/std/numerics/complex.number/complex/bit_cast.pass.cpp new file mode 100644 index 00000000000000..c35d4d6c632955 --- /dev/null +++ b/libcxx/test/std/numerics/complex.number/complex/bit_cast.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// Make sure that std::bit_cast works with std::complex. Test case extracted from +// https://github.com/llvm/llvm-project/issues/94620. + +#include +#include + +template +constexpr void test() { + using Complex = std::complex; + unsigned char data[sizeof(Complex)] = {0}; + + [[maybe_unused]] Complex c = std::bit_cast(data); +} + +constexpr bool test_all() { + test(); + test(); + test(); + return true; +} + +int main(int, char**) { + test_all(); + static_assert(test_all()); + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp index ccff0248ebac6d..d48745346bf651 100644 --- a/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp @@ -10,7 +10,7 @@ // UNSUPPORTED: no-filesystem, no-localization, no-tzdb // TODO TZDB investigate why this fails with GCC -// UNSUPPORTED: gcc-13, gcc-14 +// UNSUPPORTED: gcc-14 // XFAIL: libcpp-has-no-experimental-tzdb // XFAIL: availability-tzdb-missing diff --git a/libcxx/test/std/utilities/expected/expected.expected/monadic/transform.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/monadic/transform.pass.cpp index aa7106fb91ada3..4fb21374aebe31 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/monadic/transform.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/monadic/transform.pass.cpp @@ -10,7 +10,7 @@ // GCC has a issue for `Guaranteed copy elision for potentially-overlapping non-static data members`, // please refer to: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108333 -// XFAIL: gcc-13, gcc-14 +// XFAIL: gcc-14 // diff --git a/libcxx/test/std/utilities/expected/expected.expected/monadic/transform_error.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/monadic/transform_error.pass.cpp index ae9feccb58cfa2..d35788d9fef25e 100644 --- a/libcxx/test/std/utilities/expected/expected.expected/monadic/transform_error.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.expected/monadic/transform_error.pass.cpp @@ -10,7 +10,7 @@ // GCC has a issue for `Guaranteed copy elision for potentially-overlapping non-static data members`, // please refer to: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108333. -// XFAIL: gcc-13, gcc-14 +// XFAIL: gcc-14 // diff --git a/libcxx/test/std/utilities/expected/expected.void/monadic/transform_error.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/monadic/transform_error.pass.cpp index f70bddbed02052..f6d3011d1ea96e 100644 --- a/libcxx/test/std/utilities/expected/expected.void/monadic/transform_error.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.void/monadic/transform_error.pass.cpp @@ -10,7 +10,7 @@ // GCC has a issue for `Guaranteed copy elision for potentially-overlapping non-static data members`, // please refer to: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108333 -// XFAIL: gcc-13, gcc-14 +// XFAIL: gcc-14 // diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp index cad13c1efecaba..23dcc0b7bad9b2 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp @@ -7,7 +7,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // TODO FMT __builtin_memcpy isn't constexpr in GCC -// UNSUPPORTED: gcc-13, gcc-14 +// UNSUPPORTED: gcc-14 // diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/ctor.pair_U_V_move.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/ctor.pair_U_V_move.pass.cpp index 3b2d093eb34d49..8ba9b5696eff13 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/ctor.pair_U_V_move.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/ctor.pair_U_V_move.pass.cpp @@ -121,7 +121,28 @@ int main(int, char**) test_pair_rv(); test_pair_rv(); - test_pair_rv(); + /* For ExplicitTypes::CopyOnly, two of the viable candidates for initializing from a non-const xvalue are: + * pair(const pair&); // (defaulted copy constructor) + * template explicit pair(const pair&&); [U1 = ExplicitTypes::CopyOnly, U2 = int] + * + * This results in diverging behavior for test_convertible which uses copy-list-initialization. + * Prior to CWG2137, this would have selected the first (non-explicit) ctor as explicit ctors + * would not be considered. Afterwards, it should select the second since it is a better match, + * and then failed because it is explicit. + * + * This may change with future defect reports, and some compilers only have partial support + * for CWG2137, so use std::is_convertible directly to avoid a copy-list-initialization + */ + { + using P1 = std::pair; + using P2 = std::pair; + using UP1 = std::pair&&; + using UP2 = std::pair&&; + static_assert(std::is_constructible::value, ""); + static_assert(std::is_convertible::value, ""); + static_assert(std::is_constructible::value, ""); + static_assert(std::is_convertible::value, ""); + } test_pair_rv(); test_pair_rv(); diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 3f8ecc26321ee1..d79892e140ebd4 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -360,6 +360,12 @@ def add_version_header(tc): "values": {"c++20": 201811, "c++23": 202202}, "headers": ["memory"], }, + { + "name": "__cpp_lib_constexpr_new", + "values": {"c++26": 202406}, # P2747R2 constexpr placement new + "headers": ["new"], + "unimplemented": True, + }, { "name": "__cpp_lib_constexpr_numeric", "values": {"c++20": 201911}, @@ -679,6 +685,12 @@ def add_version_header(tc): "values": {"c++17": 201505}, "headers": ["forward_list", "list", "vector"], }, + { + "name": "__cpp_lib_inplace_vector", + "values": {"c++26": 202406}, # P0843R14 inplace_vector + "headers": ["inplace_vector"], + "unimplemented": True, + }, { "name": "__cpp_lib_int_pow2", "values": {"c++20": 202002}, @@ -771,6 +783,14 @@ def add_version_header(tc): "values": {"c++17": 201603}, "headers": ["type_traits"], }, + { + "name": "__cpp_lib_is_virtual_base_of", + "values": { + "c++26": 202406 # P2985R0 A type trait for detecting virtual base classes + }, + "headers": ["type_traits"], + "unimplemented": True, + }, { "name": "__cpp_lib_is_within_lifetime", # Note this name was changed from "__cpp_lib_within_lifetime" when the paper was adopted @@ -851,7 +871,10 @@ def add_version_header(tc): }, { "name": "__cpp_lib_mdspan", - "values": {"c++23": 202207}, + "values": { + "c++23": 202207, + # "c++26": 202406, # P2389R2 dextents Index Type Parameter + }, "headers": ["mdspan"], }, { @@ -918,6 +941,12 @@ def add_version_header(tc): "values": {"c++17": 201606, "c++23": 202110}, "headers": ["optional"], }, + { + "name": "__cpp_lib_optional_range_support", + "values": {"c++26": 202406}, # P3168R2 Give std::optional Range Support + "headers": ["optional"], + "unimplemented": True, + }, { "name": "__cpp_lib_out_ptr", "values": { @@ -933,6 +962,15 @@ def add_version_header(tc): "headers": ["algorithm", "numeric"], "unimplemented": True, }, + { + "name": "__cpp_lib_philox_engine", + "values": { + "c++26": 202406 + }, # P2075R6 Philox as an extension of the C++ RNG engines + # Note the paper mentions 202310L as value, which differs from the typical procedure. + "headers": ["random"], + "unimplemented": True, + }, { "name": "__cpp_lib_polymorphic_allocator", "values": {"c++20": 201902}, @@ -945,6 +983,7 @@ def add_version_header(tc): "values": { "c++23": 202207, # "c++26": 202403, # P3107R5: Permit an efficient implementation of std::print + # "c++26": 202406, # P3235R3 std::print more types faster with less memory }, "headers": ["ostream", "print"], }, @@ -957,7 +996,10 @@ def add_version_header(tc): }, { "name": "__cpp_lib_ranges", - "values": {"c++20": 202207}, + "values": { + "c++20": 202207, + # "c++26": 202406, # P2997R1 Removing the common reference requirement from the indirectly invocable concepts + }, "headers": ["algorithm", "functional", "iterator", "memory", "ranges"], }, { @@ -1103,6 +1145,12 @@ def add_version_header(tc): "test_suite_guard": "!defined(_LIBCPP_HAS_NO_THREADS) && (!defined(_LIBCPP_VERSION) || _LIBCPP_AVAILABILITY_HAS_SYNC)", "libcxx_guard": "!defined(_LIBCPP_HAS_NO_THREADS) && _LIBCPP_AVAILABILITY_HAS_SYNC", }, + { + "name": "__cpp_lib_senders", + "values": {"c++26": 202406}, # P2300R10 std::execution + "headers": ["execution"], + "unimplemented": True, + }, { "name": "__cpp_lib_shared_mutex", "values": {"c++17": 201505}, diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py index e2165d6ab80b01..166c9a77c08e70 100644 --- a/libcxx/utils/libcxx/header_information.py +++ b/libcxx/utils/libcxx/header_information.py @@ -143,6 +143,7 @@ "flat_set", "generator", "hazard_pointer", + "inplace_vector", "linalg", "rcu", "spanstream", diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index ab6c84fe7326be..5e708da4f8fbe1 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -587,7 +587,7 @@ def check_gdb(cfg): Feature( name="_target-has-llvm-17", when=lambda cfg: BooleanExpression.evaluate( - "target={{.+}}-apple-macosx{{14.[4-9](.0)?}}", + "target={{.+}}-apple-macosx{{14.[4-9](.0)?}} || target={{.+}}-apple-macosx{{1[5-9]([.].+)?}}", cfg.available_features, ), ), diff --git a/libcxxabi/src/cxa_exception_storage.cpp b/libcxxabi/src/cxa_exception_storage.cpp index 2479f550e09eff..c842da195accbd 100644 --- a/libcxxabi/src/cxa_exception_storage.cpp +++ b/libcxxabi/src/cxa_exception_storage.cpp @@ -24,7 +24,7 @@ extern "C" { } // extern "C" } // namespace __cxxabiv1 -#elif defined(HAS_THREAD_LOCAL) +#elif __has_feature(cxx_thread_local) namespace __cxxabiv1 { namespace { diff --git a/libcxxabi/test/catch_member_function_pointer_02.pass.cpp b/libcxxabi/test/catch_member_function_pointer_02.pass.cpp index 112e78779b4720..5d702031ce352f 100644 --- a/libcxxabi/test/catch_member_function_pointer_02.pass.cpp +++ b/libcxxabi/test/catch_member_function_pointer_02.pass.cpp @@ -12,7 +12,7 @@ // GCC supports noexcept function types but this test still fails. // This is likely a bug in their implementation. Investigation needed. -// XFAIL: gcc-13, gcc-14 +// XFAIL: gcc-14 #include diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 281567e372298b..07a1b63be80510 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -1084,10 +1084,94 @@ static void mergeArch(RISCVISAUtils::OrderedExtensionMap &mergedExts, } } +static void mergeAtomic(DenseMap::iterator it, + const InputSectionBase *oldSection, + const InputSectionBase *newSection, + RISCVAttrs::RISCVAtomicAbiTag oldTag, + RISCVAttrs::RISCVAtomicAbiTag newTag) { + using RISCVAttrs::RISCVAtomicAbiTag; + // Same tags stay the same, and UNKNOWN is compatible with anything + if (oldTag == newTag || newTag == RISCVAtomicAbiTag::UNKNOWN) + return; + + auto reportAbiError = [&]() { + errorOrWarn("atomic abi mismatch for " + oldSection->name + "\n>>> " + + toString(oldSection) + + ": atomic_abi=" + Twine(static_cast(oldTag)) + + "\n>>> " + toString(newSection) + + ": atomic_abi=" + Twine(static_cast(newTag))); + }; + + auto reportUnknownAbiError = [](const InputSectionBase *section, + RISCVAtomicAbiTag tag) { + switch (tag) { + case RISCVAtomicAbiTag::UNKNOWN: + case RISCVAtomicAbiTag::A6C: + case RISCVAtomicAbiTag::A6S: + case RISCVAtomicAbiTag::A7: + return; + }; + errorOrWarn("unknown atomic abi for " + section->name + "\n>>> " + + toString(section) + + ": atomic_abi=" + Twine(static_cast(tag))); + }; + switch (oldTag) { + case RISCVAtomicAbiTag::UNKNOWN: + it->getSecond() = static_cast(newTag); + return; + case RISCVAtomicAbiTag::A6C: + switch (newTag) { + case RISCVAtomicAbiTag::A6S: + it->getSecond() = static_cast(RISCVAtomicAbiTag::A6C); + return; + case RISCVAtomicAbiTag::A7: + reportAbiError(); + return; + case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN: + case RISCVAttrs::RISCVAtomicAbiTag::A6C: + return; + }; + + case RISCVAtomicAbiTag::A6S: + switch (newTag) { + case RISCVAtomicAbiTag::A6C: + it->getSecond() = static_cast(RISCVAtomicAbiTag::A6C); + return; + case RISCVAtomicAbiTag::A7: + it->getSecond() = static_cast(RISCVAtomicAbiTag::A7); + return; + case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN: + case RISCVAttrs::RISCVAtomicAbiTag::A6S: + return; + }; + + case RISCVAtomicAbiTag::A7: + switch (newTag) { + case RISCVAtomicAbiTag::A6S: + it->getSecond() = static_cast(RISCVAtomicAbiTag::A7); + return; + case RISCVAtomicAbiTag::A6C: + reportAbiError(); + return; + case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN: + case RISCVAttrs::RISCVAtomicAbiTag::A7: + return; + }; + }; + + // If we get here, then we have an invalid tag, so report it. + // Putting these checks at the end allows us to only do these checks when we + // need to, since this is expected to be a rare occurrence. + reportUnknownAbiError(oldSection, oldTag); + reportUnknownAbiError(newSection, newTag); +} + static RISCVAttributesSection * mergeAttributesSection(const SmallVector §ions) { + using RISCVAttrs::RISCVAtomicAbiTag; RISCVISAUtils::OrderedExtensionMap exts; const InputSectionBase *firstStackAlign = nullptr; + const InputSectionBase *firstAtomicAbi = nullptr; unsigned firstStackAlignValue = 0, xlen = 0; bool hasArch = false; @@ -1136,7 +1220,15 @@ mergeAttributesSection(const SmallVector §ions) { break; case RISCVAttrs::AttrType::ATOMIC_ABI: - // TODO: Handle ATOMIC_ABI tag merging + if (auto i = parser.getAttributeValue(tag.attr)) { + auto r = merged.intAttr.try_emplace(tag.attr, *i); + if (r.second) + firstAtomicAbi = sec; + else + mergeAtomic(r.first, firstAtomicAbi, sec, + static_cast(r.first->getSecond()), + static_cast(*i)); + } continue; } diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7800c2919a2bd0..a4863d6717efb4 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -631,7 +631,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // of Libtool. We cannot convince every software developer to migrate to // the latest version and re-generate scripts. So we have this hack. if (args.hasArg(OPT_v) || args.hasArg(OPT_version)) - message(getLLDVersion() + ", compatible with GNU linkers"); + message(getLLDVersion() + " (compatible with GNU linkers)"); if (const char *path = getReproduceOption(args)) { // Note that --reproduce is a debug option so you can ignore it diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 0cee1a84d0b55a..b40a812f30bd3b 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -1725,7 +1725,10 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella, } // Initialize symbols. - exportingFile = isImplicitlyLinked(installName) ? this : this->umbrella; + bool canBeImplicitlyLinked = findCommand(hdr, LC_SUB_CLIENT) == nullptr; + exportingFile = (canBeImplicitlyLinked && isImplicitlyLinked(installName)) + ? this + : this->umbrella; const auto *dyldInfo = findCommand(hdr, LC_DYLD_INFO_ONLY); const auto *exportsTrie = @@ -1884,7 +1887,10 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella, checkAppExtensionSafety(interface.isApplicationExtensionSafe()); - exportingFile = isImplicitlyLinked(installName) ? this : umbrella; + bool canBeImplicitlyLinked = interface.allowableClients().size() == 0; + exportingFile = (canBeImplicitlyLinked && isImplicitlyLinked(installName)) + ? this + : umbrella; auto addSymbol = [&](const llvm::MachO::Symbol &symbol, const Twine &name) -> void { StringRef savedName = saver().save(name); diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 12ea6de0fc15c4..c95170c1165b22 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -26,15 +26,21 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- +* ``EI_OSABI`` in the output is now inferred from input object files. + (`#97144 `_) * ``--compress-sections ={none,zlib,zstd}[:level]`` is added to compress matched output sections without the ``SHF_ALLOC`` flag. (`#84855 `_) (`#90567 `_) * The default compression level for zlib is now independent of linker optimization level (``Z_BEST_SPEED``). +* zstd compression parallelism no longer requires ``ZSTD_MULITHREAD`` build. * ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` notes, ``R_AARCH64_AUTH_ABS64`` and ``R_AARCH64_AUTH_RELATIVE`` relocations are now supported. (`#72714 `_) +* ``--no-allow-shlib-undefined`` now rejects non-exported definitions in the + ``def-hidden.so ref.so`` case. + (`#86777 `_) * ``--debug-names`` is added to create a merged ``.debug_names`` index from input ``.debug_names`` sections. Type units are not handled yet. (`#86508 `_) @@ -44,6 +50,9 @@ ELF Improvements (typical for embedded). It also makes full LTO feasible in such cases, since IR merging currently prevents the linker script from referring to input files. (`#90007 `_) +* ``--default-script`/``-dT`` is implemented to specify a default script that is processed + if ``--script``/``-T`` is not specified. + (`#89327 `_) * ``--force-group-allocation`` is implemented to discard ``SHT_GROUP`` sections and combine relocation sections if their relocated section group members are placed to the same output section. @@ -51,6 +60,30 @@ ELF Improvements * ``--build-id`` now defaults to generating a 20-byte digest ("sha1") instead of 8-byte ("fast"). This improves compatibility with RPM packaging tools. (`#93943 `_) +* ``-z lrodata-after-bss`` is implemented to place ``.lrodata`` after ``.bss``. + (`#81224 `_) +* ``--export-dynamic`` no longer creates dynamic sections for ``-no-pie`` static linking. +* ``--lto-emit-asm`` is now added as the canonical spelling of ``--plugin-opt=emit-llvm``. +* ``--lto-emit-llvm`` now uses the pre-codegen module. + (`#97480 `_) +* When AArch64 PAuth is enabled, ``-z pack-relative-relocs`` now encodes ``R_AARCH64_AUTH_RELATIVE`` relocations in ``.rela.auth.dyn``. + (`#96496 `_) +* ``-z gcs`` and ``-z gcs-report`` are now supported for AArch64 Guarded Control Stack extension. +* ``-r`` now forces ``-Bstatic``. +* Thumb2 PLT is now supported for Cortex-M processors. + (`#93644 `_) +* ``DW_EH_sdata4`` of addresses larger than 0x80000000 is now supported for MIPS32. + (`#92438 `_) +* Certain unknown section types are rejected. + (`#85173 `_) +* ``PROVIDE(lhs = rhs) PROVIDE(rhs = ...)``, ``lhs`` is now defined only if ``rhs`` is needed. + (`#74771 `_) + (`#87530 `_) +* Orphan placement is refined to prefer the last similar section when its rank <= orphan's rank. + (`#94099 `_) + Non-alloc orphan sections are now placed at the end. + (`#94519 `_) +* R_X86_64_REX_GOTPCRELX of the addq form is no longer incorrectly optimized when the address is larger than 0x80000000. Breaking changes ---------------- diff --git a/lld/test/ELF/riscv-attributes.s b/lld/test/ELF/riscv-attributes.s index 68534d0fb6b75c..057223c18418e1 100644 --- a/lld/test/ELF/riscv-attributes.s +++ b/lld/test/ELF/riscv-attributes.s @@ -44,6 +44,46 @@ # RUN: not ld.lld a.o b.o c.o diff_stack_align.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=STACK_ALIGN --implicit-check-not=error: # STACK_ALIGN: error: diff_stack_align.o:(.riscv.attributes) has stack_align=32 but a.o:(.riscv.attributes) has stack_align=16 +## RISC-V tag merging for atomic_abi values A6C and A7 lead to an error. +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_A6C.s -o atomic_abi_A6C.o +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_A7.s -o atomic_abi_A7.o +# RUN: not ld.lld atomic_abi_A6C.o atomic_abi_A7.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ATOMIC_ABI_ERROR --implicit-check-not=error: +# ATOMIC_ABI_ERROR: error: atomic abi mismatch for .riscv.attributes +# ATOMIC_ABI_ERROR-NEXT: >>> atomic_abi_A6C.o:(.riscv.attributes): atomic_abi=1 +# ATOMIC_ABI_ERROR-NEXT: >>> atomic_abi_A7.o:(.riscv.attributes): atomic_abi=3 + +## RISC-V tag merging for atomic_abi values A6C and invalid lead to an error. +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_invalid.s -o atomic_abi_invalid.o +# RUN: not ld.lld atomic_abi_A6C.o atomic_abi_invalid.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ATOMIC_ABI_INVALID --implicit-check-not=error: +# ATOMIC_ABI_INVALID: error: unknown atomic abi for .riscv.attributes +# ATOMIC_ABI_INVALID-NEXT: >>> atomic_abi_invalid.o:(.riscv.attributes): atomic_abi=42 + +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_A6S.s -o atomic_abi_A6S.o +# RUN: ld.lld atomic_abi_A6S.o atomic_abi_A6C.o -o atomic_abi_A6C_A6S +# RUN: llvm-readobj -A atomic_abi_A6C_A6S | FileCheck %s --check-prefix=A6C_A6S + +# RUN: ld.lld atomic_abi_A6S.o atomic_abi_A7.o -o atomic_abi_A6S_A7 +# RUN: llvm-readobj -A atomic_abi_A6S_A7 | FileCheck %s --check-prefix=A6S_A7 + +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_unknown.s -o atomic_abi_unknown.o +# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A6C.o -o atomic_abi_A6C_unknown +# RUN: llvm-readobj -A atomic_abi_A6C_unknown | FileCheck %s --check-prefixes=UNKNOWN_A6C + +# RUN: ld.lld atomic_abi_unknown.o diff_stack_align.o -o atomic_abi_none_unknown +# RUN: llvm-readobj -A atomic_abi_none_unknown | FileCheck %s --check-prefixes=UNKNOWN_NONE + +# RUN: ld.lld diff_stack_align.o atomic_abi_A6C.o -o atomic_abi_A6C_none +# RUN: llvm-readobj -A atomic_abi_A6C_none | FileCheck %s --check-prefixes=NONE_A6C + +# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A6S.o -o atomic_abi_A6S_unknown +# RUN: llvm-readobj -A atomic_abi_A6S_unknown | FileCheck %s --check-prefix=UNKNOWN_A6S + +# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A7.o -o atomic_abi_A7_unknown +# RUN: llvm-readobj -A atomic_abi_A7_unknown | FileCheck %s --check-prefix=UNKNOWN_A7 + +# RUN: ld.lld diff_stack_align.o atomic_abi_A7.o -o atomic_abi_A7_none +# RUN: llvm-readobj -A atomic_abi_A7_none | FileCheck %s --check-prefix=NONE_A7 + ## The deprecated priv_spec is not handled as GNU ld does. ## Differing priv_spec attributes lead to an absent attribute. # RUN: llvm-mc -filetype=obj -triple=riscv64 diff_priv_spec.s -o diff_priv_spec.o @@ -286,6 +326,178 @@ .attribute priv_spec, 3 .attribute priv_spec_minor, 3 +#--- atomic_abi_unknown.s +.attribute atomic_abi, 0 + +#--- atomic_abi_A6C.s +.attribute atomic_abi, 1 + +#--- atomic_abi_A6S.s +.attribute atomic_abi, 2 + +#--- atomic_abi_A7.s +.attribute atomic_abi, 3 + +#--- atomic_abi_invalid.s +.attribute atomic_abi, 42 + +# UNKNOWN_NONE: BuildAttributes { +# UNKNOWN_NONE-NEXT: FormatVersion: 0x41 +# UNKNOWN_NONE-NEXT: Section 1 { +# UNKNOWN_NONE-NEXT: SectionLength: 17 +# UNKNOWN_NONE-NEXT: Vendor: riscv +# UNKNOWN_NONE-NEXT: Tag: Tag_File (0x1) +# UNKNOWN_NONE-NEXT: Size: 7 +# UNKNOWN_NONE-NEXT: FileAttributes { +# UNKNOWN_NONE-NEXT: Attribute { +# UNKNOWN_NONE-NEXT: Tag: 4 +# UNKNOWN_NONE-NEXT: Value: 32 +# UNKNOWN_NONE-NEXT: TagName: stack_align +# UNKNOWN_NONE-NEXT: Description: Stack alignment is 32-bytes +# UNKNOWN_NONE-NEXT: } +# UNKNOWN_NONE-NEXT: } +# UNKNOWN_NONE-NEXT: } +# UNKNOWN_NONE-NEXT: } + +# NONE_A6C: BuildAttributes { +# NONE_A6C-NEXT: FormatVersion: 0x41 +# NONE_A6C-NEXT: Section 1 { +# NONE_A6C-NEXT: SectionLength: 19 +# NONE_A6C-NEXT: Vendor: riscv +# NONE_A6C-NEXT: Tag: Tag_File (0x1) +# NONE_A6C-NEXT: Size: 9 +# NONE_A6C-NEXT: FileAttributes { +# NONE_A6C-NEXT: Attribute { +# NONE_A6C-NEXT: Tag: 14 +# NONE_A6C-NEXT: Value: 1 +# NONE_A6C-NEXT: TagName: atomic_abi +# NONE_A6C-NEXT: Description: Atomic ABI is 1 +# NONE_A6C-NEXT: } +# NONE_A6C-NEXT: Attribute { +# NONE_A6C-NEXT: Tag: 4 +# NONE_A6C-NEXT: Value: 32 +# NONE_A6C-NEXT: TagName: stack_align +# NONE_A6C-NEXT: Description: Stack alignment is 32-bytes +# NONE_A6C-NEXT: } +# NONE_A6C-NEXT: } +# NONE_A6C-NEXT: } +# NONE_A6C-NEXT: } + +# UNKNOWN_A6C: BuildAttributes { +# UNKNOWN_A6C-NEXT: FormatVersion: 0x41 +# UNKNOWN_A6C-NEXT: Section 1 { +# UNKNOWN_A6C-NEXT: SectionLength: 17 +# UNKNOWN_A6C-NEXT: Vendor: riscv +# UNKNOWN_A6C-NEXT: Tag: Tag_File (0x1) +# UNKNOWN_A6C-NEXT: Size: 7 +# UNKNOWN_A6C-NEXT: FileAttributes { +# UNKNOWN_A6C-NEXT: Attribute { +# UNKNOWN_A6C-NEXT: Tag: 14 +# UNKNOWN_A6C-NEXT: Value: 1 +# UNKNOWN_A6C-NEXT: TagName: atomic_abi +# UNKNOWN_A6C-NEXT: Description: Atomic ABI is 1 +# UNKNOWN_A6C-NEXT: } +# UNKNOWN_A6C-NEXT: } +# UNKNOWN_A6C-NEXT: } +# UNKNOWN_A6C-NEXT: } + +# UNKNOWN_A6S: BuildAttributes { +# UNKNOWN_A6S-NEXT: FormatVersion: 0x41 +# UNKNOWN_A6S-NEXT: Section 1 { +# UNKNOWN_A6S-NEXT: SectionLength: +# UNKNOWN_A6S-NEXT: Vendor: riscv +# UNKNOWN_A6S-NEXT: Tag: Tag_File (0x1) +# UNKNOWN_A6S-NEXT: Size: 7 +# UNKNOWN_A6S-NEXT: FileAttributes { +# UNKNOWN_A6S-NEXT: Attribute { +# UNKNOWN_A6S-NEXT: Tag: 14 +# UNKNOWN_A6S-NEXT: Value: 2 +# UNKNOWN_A6S-NEXT: TagName: atomic_abi +# UNKNOWN_A6S-NEXT: Description: Atomic ABI is 2 +# UNKNOWN_A6S-NEXT: } +# UNKNOWN_A6S-NEXT: } +# UNKNOWN_A6S-NEXT: } +# UNKNOWN_A6S-NEXT: } + +# NONE_A7: BuildAttributes { +# NONE_A7-NEXT: FormatVersion: 0x41 +# NONE_A7-NEXT: Section 1 { +# NONE_A7-NEXT: SectionLength: 19 +# NONE_A7-NEXT: Vendor: riscv +# NONE_A7-NEXT: Tag: Tag_File (0x1) +# NONE_A7-NEXT: Size: 9 +# NONE_A7-NEXT: FileAttributes { +# NONE_A7-NEXT: Attribute { +# NONE_A7-NEXT: Tag: 14 +# NONE_A7-NEXT: Value: 3 +# NONE_A7-NEXT: TagName: atomic_abi +# NONE_A7-NEXT: Description: Atomic ABI is 3 +# NONE_A7-NEXT: } +# NONE_A7-NEXT: Attribute { +# NONE_A7-NEXT: Tag: 4 +# NONE_A7-NEXT: Value: 32 +# NONE_A7-NEXT: TagName: stack_align +# NONE_A7-NEXT: Description: Stack alignment is 32-bytes +# NONE_A7-NEXT: } +# NONE_A7-NEXT: } +# NONE_A7-NEXT: } +# NONE_A7-NEXT: } + + +# UNKNOWN_A7: BuildAttributes { +# UNKNOWN_A7-NEXT: FormatVersion: 0x41 +# UNKNOWN_A7-NEXT: Section 1 { +# UNKNOWN_A7-NEXT: SectionLength: 17 +# UNKNOWN_A7-NEXT: Vendor: riscv +# UNKNOWN_A7-NEXT: Tag: Tag_File (0x1) +# UNKNOWN_A7-NEXT: Size: 7 +# UNKNOWN_A7-NEXT: FileAttributes { +# UNKNOWN_A7-NEXT: Attribute { +# UNKNOWN_A7-NEXT: Tag: 14 +# UNKNOWN_A7-NEXT: Value: 3 +# UNKNOWN_A7-NEXT: TagName: atomic_abi +# UNKNOWN_A7-NEXT: Description: Atomic ABI is 3 +# UNKNOWN_A7-NEXT: } +# UNKNOWN_A7-NEXT: } +# UNKNOWN_A7-NEXT: } +# UNKNOWN_A7-NEXT: } + +# A6C_A6S: BuildAttributes { +# A6C_A6S-NEXT: FormatVersion: 0x41 +# A6C_A6S-NEXT: Section 1 { +# A6C_A6S-NEXT: SectionLength: 17 +# A6C_A6S-NEXT: Vendor: riscv +# A6C_A6S-NEXT: Tag: Tag_File (0x1) +# A6C_A6S-NEXT: Size: 7 +# A6C_A6S-NEXT: FileAttributes { +# A6C_A6S-NEXT: Attribute { +# A6C_A6S-NEXT: Tag: 14 +# A6C_A6S-NEXT: Value: 1 +# A6C_A6S-NEXT: TagName: atomic_abi +# A6C_A6S-NEXT: Description: Atomic ABI is 1 +# A6C_A6S-NEXT: } +# A6C_A6S-NEXT: } +# A6C_A6S-NEXT: } +# A6C_A6S-NEXT: } + +# A6S_A7: BuildAttributes { +# A6S_A7-NEXT: FormatVersion: 0x41 +# A6S_A7-NEXT: Section 1 { +# A6S_A7-NEXT: SectionLength: 17 +# A6S_A7-NEXT: Vendor: riscv +# A6S_A7-NEXT: Tag: Tag_File (0x1) +# A6S_A7-NEXT: Size: 7 +# A6S_A7-NEXT: FileAttributes { +# A6S_A7-NEXT: Attribute { +# A6S_A7-NEXT: Tag: 14 +# A6S_A7-NEXT: Value: 3 +# A6S_A7-NEXT: TagName: atomic_abi +# A6S_A7-NEXT: Description: Atomic ABI is 3 +# A6S_A7-NEXT: } +# A6S_A7-NEXT: } +# A6S_A7-NEXT: } +# A6S_A7-NEXT: } + #--- unknown13.s .attribute 13, "0" #--- unknown13a.s diff --git a/lld/test/ELF/version.test b/lld/test/ELF/version.test index 383c1ac976d962..72bd2ab56a126f 100644 --- a/lld/test/ELF/version.test +++ b/lld/test/ELF/version.test @@ -7,4 +7,4 @@ # RUN: ld.lld -V 2>&1 | FileCheck %s # RUN: not ld.lld -V %t/not-exist 2>&1 | FileCheck %s -# CHECK: LLD {{.*}}, compatible with GNU linkers +# CHECK: LLD {{.+}} (compatible with GNU linkers) diff --git a/lld/test/MachO/implicit-and-allowable-clients.test b/lld/test/MachO/implicit-and-allowable-clients.test new file mode 100644 index 00000000000000..576db33af2ea03 --- /dev/null +++ b/lld/test/MachO/implicit-and-allowable-clients.test @@ -0,0 +1,48 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t +# RUN: ln -s Versions/A/FrameworkPublic.tbd %t/System/Library/Frameworks/FrameworkPublic.framework/ +# RUN: ln -s Versions/A/FrameworkPrivate.tbd %t/System/Library/Frameworks/FrameworkPrivate.framework/ +# RUN: llvm-mc -filetype obj -triple arm64-apple-macos11.0 %t/test.s -o %t/test.o +# RUN: %lld -arch arm64 -platform_version macos 11.0 11.0 -o %t/test -syslibroot %t -framework FrameworkPublic %t/test.o + +# RUN: llvm-objdump --bind --no-show-raw-insn -d %t/test | FileCheck %s +# CHECK: Bind table: +# CHECK-DAG: __DATA __data {{.*}} pointer 0 FrameworkPublic _funcPublic +# CHECK-DAG: __DATA __data {{.*}} pointer 0 FrameworkPublic _funcPrivate + +#--- System/Library/Frameworks/FrameworkPublic.framework/Versions/A/FrameworkPublic.tbd +--- !tapi-tbd +tbd-version: 4 +targets: [ arm64-macos ] +install-name: '/System/Library/Frameworks/FrameworkPublic.framework/Versions/A/FrameworkPublic' +current-version: 1.0.0 +reexported-libraries: + - targets: [ arm64-macos ] + libraries: [ '/System/Library/Frameworks/FrameworkPrivate.framework/Versions/A/FrameworkPrivate' ] +exports: + - targets: [ arm64-macos ] + symbols: [ '_funcPublic' ] +... +#--- System/Library/Frameworks/FrameworkPrivate.framework/Versions/A/FrameworkPrivate.tbd +--- !tapi-tbd +tbd-version: 4 +targets: [ arm64-macos ] +install-name: '/System/Library/Frameworks/FrameworkPrivate.framework/Versions/A/FrameworkPrivate' +current-version: 1.0.0 +allowable-clients: + - targets: [ arm64-macos ] + clients: [ FrameworkPublic ] +exports: + - targets: [ arm64-macos ] + symbols: [ '_funcPrivate' ] +... +#--- test.s +.text +.globl _main + +_main: + ret + +.data + .quad _funcPublic + .quad _funcPrivate diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index f778065aaca377..d98495b8a9df38 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -914,12 +914,15 @@ def get_variant_npos_value(index_byte_size): if index == npos_value: return " No Value" + # Strip references and typedefs. + variant_type = raw_obj.GetType().GetCanonicalType().GetDereferencedType() + template_arg_count = variant_type.GetNumberOfTemplateArguments() + # Invalid index can happen when the variant is not initialized yet. - template_arg_count = data_obj.GetType().GetNumberOfTemplateArguments() if index >= template_arg_count: return " " - active_type = data_obj.GetType().GetTemplateArgumentType(index) + active_type = variant_type.GetTemplateArgumentType(index) return f" Active Type = {active_type.GetDisplayTypeName()} " diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index 6348d8103f85de..8592323322e383 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -178,6 +178,7 @@ class ObjectFile : public std::enable_shared_from_this, lldb::offset_t file_offset, lldb::offset_t file_size, lldb_private::ModuleSpecList &specs); + static bool IsObjectFile(lldb_private::FileSpec file_spec); /// Split a path into a file path with object name. /// /// For paths like "/tmp/foo.a(bar.o)" we often need to split a path up into diff --git a/lldb/include/lldb/Utility/Listener.h b/lldb/include/lldb/Utility/Listener.h index daa7deb345f301..d48816ec0ea4d8 100644 --- a/lldb/include/lldb/Utility/Listener.h +++ b/lldb/include/lldb/Utility/Listener.h @@ -94,8 +94,6 @@ class Listener : public std::enable_shared_from_this { size_t HandleBroadcastEvent(lldb::EventSP &event_sp); - void SetShadow(bool is_shadow) { m_is_shadow = is_shadow; } - private: // Classes that inherit from Listener can see and modify these struct BroadcasterInfo { @@ -127,12 +125,11 @@ class Listener : public std::enable_shared_from_this { std::string m_name; broadcaster_collection m_broadcasters; - std::recursive_mutex m_broadcasters_mutex; // Protects m_broadcasters + std::mutex m_broadcasters_mutex; // Protects m_broadcasters event_collection m_events; std::mutex m_events_mutex; // Protects m_broadcasters and m_events std::condition_variable m_events_condition; broadcaster_manager_collection m_broadcaster_managers; - bool m_is_shadow = false; void BroadcasterWillDestruct(Broadcaster *); diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/lldbgdbserverutils.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/lldbgdbserverutils.py index 5bd352d3ac5492..94376a16d39f6a 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/lldbgdbserverutils.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/lldbgdbserverutils.py @@ -1042,7 +1042,7 @@ def __init__(self): class Pipe(object): def __init__(self, prefix): while True: - self.name = "lldb-" + str(random.randrange(1e10)) + self.name = "lldb-" + str(random.randrange(10**10)) full_name = "\\\\.\\pipe\\" + self.name self._handle = CreateNamedPipe( full_name, diff --git a/lldb/source/API/SBAttachInfo.cpp b/lldb/source/API/SBAttachInfo.cpp index 8ce1f1d65c4964..a9f712c79c7fe0 100644 --- a/lldb/source/API/SBAttachInfo.cpp +++ b/lldb/source/API/SBAttachInfo.cpp @@ -266,13 +266,7 @@ SBListener SBAttachInfo::GetShadowListener() { void SBAttachInfo::SetShadowListener(SBListener &listener) { LLDB_INSTRUMENT_VA(this, listener); - ListenerSP listener_sp = listener.GetSP(); - if (listener_sp && listener.IsValid()) - listener_sp->SetShadow(true); - else - listener_sp = nullptr; - - m_opaque_sp->SetShadowListener(listener_sp); + m_opaque_sp->SetShadowListener(listener.GetSP()); } const char *SBAttachInfo::GetScriptedProcessClassName() const { diff --git a/lldb/source/API/SBLaunchInfo.cpp b/lldb/source/API/SBLaunchInfo.cpp index d5f935083e6c1e..d6b52e8a67a49e 100644 --- a/lldb/source/API/SBLaunchInfo.cpp +++ b/lldb/source/API/SBLaunchInfo.cpp @@ -402,11 +402,5 @@ SBListener SBLaunchInfo::GetShadowListener() { void SBLaunchInfo::SetShadowListener(SBListener &listener) { LLDB_INSTRUMENT_VA(this, listener); - ListenerSP listener_sp = listener.GetSP(); - if (listener_sp && listener.IsValid()) - listener_sp->SetShadow(true); - else - listener_sp = nullptr; - - m_opaque_sp->SetShadowListener(listener_sp); + m_opaque_sp->SetShadowListener(listener.GetSP()); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp index 05cfa0568c25d4..feaa51a96843ab 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -202,155 +202,6 @@ bool lldb_private::formatters::LibcxxUniquePointerSummaryProvider( return true; } -lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: - LibCxxUnorderedMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) - : SyntheticChildrenFrontEnd(*valobj_sp) { - if (valobj_sp) - Update(); -} - -lldb::ChildCacheState lldb_private::formatters:: - LibCxxUnorderedMapIteratorSyntheticFrontEnd::Update() { - m_pair_sp.reset(); - m_iter_ptr = nullptr; - - ValueObjectSP valobj_sp = m_backend.GetSP(); - if (!valobj_sp) - return lldb::ChildCacheState::eRefetch; - - TargetSP target_sp(valobj_sp->GetTargetSP()); - - if (!target_sp) - return lldb::ChildCacheState::eRefetch; - - if (!valobj_sp) - return lldb::ChildCacheState::eRefetch; - - auto exprPathOptions = ValueObject::GetValueForExpressionPathOptions() - .DontCheckDotVsArrowSyntax() - .SetSyntheticChildrenTraversal( - ValueObject::GetValueForExpressionPathOptions:: - SyntheticChildrenTraversal::None); - - // This must be a ValueObject* because it is a child of the ValueObject we - // are producing children for it if were a ValueObjectSP, we would end up - // with a loop (iterator -> synthetic -> child -> parent == iterator) and - // that would in turn leak memory by never allowing the ValueObjects to die - // and free their memory. - m_iter_ptr = - valobj_sp - ->GetValueForExpressionPath(".__i_.__node_", nullptr, nullptr, - exprPathOptions, nullptr) - .get(); - - if (m_iter_ptr) { - auto iter_child(valobj_sp->GetChildMemberWithName("__i_")); - if (!iter_child) { - m_iter_ptr = nullptr; - return lldb::ChildCacheState::eRefetch; - } - - CompilerType node_type(iter_child->GetCompilerType() - .GetTypeTemplateArgument(0) - .GetPointeeType()); - - CompilerType pair_type(node_type.GetTypeTemplateArgument(0)); - - std::string name; - uint64_t bit_offset_ptr; - uint32_t bitfield_bit_size_ptr; - bool is_bitfield_ptr; - - pair_type = pair_type.GetFieldAtIndex( - 0, name, &bit_offset_ptr, &bitfield_bit_size_ptr, &is_bitfield_ptr); - if (!pair_type) { - m_iter_ptr = nullptr; - return lldb::ChildCacheState::eRefetch; - } - - uint64_t addr = m_iter_ptr->GetValueAsUnsigned(LLDB_INVALID_ADDRESS); - m_iter_ptr = nullptr; - - if (addr == 0 || addr == LLDB_INVALID_ADDRESS) - return lldb::ChildCacheState::eRefetch; - - auto ts = pair_type.GetTypeSystem(); - auto ast_ctx = ts.dyn_cast_or_null(); - if (!ast_ctx) - return lldb::ChildCacheState::eRefetch; - - // Mimick layout of std::__hash_iterator::__node_ and read it in - // from process memory. - // - // The following shows the contiguous block of memory: - // - // +-----------------------------+ class __hash_node_base - // __node_ | __next_pointer __next_; | - // +-----------------------------+ class __hash_node - // | size_t __hash_; | - // | __node_value_type __value_; | <<< our key/value pair - // +-----------------------------+ - // - CompilerType tree_node_type = ast_ctx->CreateStructForIdentifier( - llvm::StringRef(), - {{"__next_", - ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()}, - {"__hash_", ast_ctx->GetBasicType(lldb::eBasicTypeUnsignedLongLong)}, - {"__value_", pair_type}}); - std::optional size = tree_node_type.GetByteSize(nullptr); - if (!size) - return lldb::ChildCacheState::eRefetch; - WritableDataBufferSP buffer_sp(new DataBufferHeap(*size, 0)); - ProcessSP process_sp(target_sp->GetProcessSP()); - Status error; - process_sp->ReadMemory(addr, buffer_sp->GetBytes(), - buffer_sp->GetByteSize(), error); - if (error.Fail()) - return lldb::ChildCacheState::eRefetch; - DataExtractor extractor(buffer_sp, process_sp->GetByteOrder(), - process_sp->GetAddressByteSize()); - auto pair_sp = CreateValueObjectFromData( - "pair", extractor, valobj_sp->GetExecutionContextRef(), tree_node_type); - if (pair_sp) - m_pair_sp = pair_sp->GetChildAtIndex(2); - } - - return lldb::ChildCacheState::eRefetch; -} - -llvm::Expected lldb_private::formatters:: - LibCxxUnorderedMapIteratorSyntheticFrontEnd::CalculateNumChildren() { - return 2; -} - -lldb::ValueObjectSP lldb_private::formatters:: - LibCxxUnorderedMapIteratorSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { - if (m_pair_sp) - return m_pair_sp->GetChildAtIndex(idx); - return lldb::ValueObjectSP(); -} - -bool lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: - MightHaveChildren() { - return true; -} - -size_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: - GetIndexOfChildWithName(ConstString name) { - if (name == "first") - return 0; - if (name == "second") - return 1; - return UINT32_MAX; -} - -SyntheticChildrenFrontEnd * -lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEndCreator( - CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) { - return (valobj_sp ? new LibCxxUnorderedMapIteratorSyntheticFrontEnd(valobj_sp) - : nullptr); -} - /* (lldb) fr var ibeg --raw --ptr-depth 1 -T (std::__1::__wrap_iter) ibeg = { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h index 21dba015d1ba19..5307b5251ca843 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.h @@ -87,56 +87,6 @@ bool LibcxxContainerSummaryProvider(ValueObject &valobj, Stream &stream, bool LibcxxSpanSummaryProvider(ValueObject &valobj, Stream &stream, const TypeSummaryOptions &options); -/// Formats libcxx's std::unordered_map iterators -/// -/// In raw form a std::unordered_map::iterator is represented as follows: -/// -/// (lldb) var it --raw --ptr-depth 1 -/// (std::__1::__hash_map_iterator< -/// std::__1::__hash_iterator< -/// std::__1::__hash_node< -/// std::__1::__hash_value_type< -/// std::__1::basic_string, -/// std::__1::allocator >, std::__1::basic_string, std::__1::allocator > >, -/// void *> *> >) -/// it = { -/// __i_ = { -/// __node_ = 0x0000600001700040 { -/// __next_ = 0x0000600001704000 -/// } -/// } -/// } -class LibCxxUnorderedMapIteratorSyntheticFrontEnd - : public SyntheticChildrenFrontEnd { -public: - LibCxxUnorderedMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); - - ~LibCxxUnorderedMapIteratorSyntheticFrontEnd() override = default; - - llvm::Expected CalculateNumChildren() override; - - lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; - - lldb::ChildCacheState Update() override; - - bool MightHaveChildren() override; - - size_t GetIndexOfChildWithName(ConstString name) override; - -private: - ValueObject *m_iter_ptr = nullptr; ///< Held, not owned. Child of iterator - ///< ValueObject supplied at construction. - - lldb::ValueObjectSP m_pair_sp; ///< ValueObject for the key/value pair - ///< that the iterator currently points - ///< to. -}; - -SyntheticChildrenFrontEnd * -LibCxxUnorderedMapIteratorSyntheticFrontEndCreator(CXXSyntheticChildren *, - lldb::ValueObjectSP); - SyntheticChildrenFrontEnd * LibCxxVectorIteratorSyntheticFrontEndCreator(CXXSyntheticChildren *, lldb::ValueObjectSP); @@ -230,6 +180,10 @@ SyntheticChildrenFrontEnd * LibcxxStdUnorderedMapSyntheticFrontEndCreator(CXXSyntheticChildren *, lldb::ValueObjectSP); +SyntheticChildrenFrontEnd * +LibCxxUnorderedMapIteratorSyntheticFrontEndCreator(CXXSyntheticChildren *, + lldb::ValueObjectSP); + SyntheticChildrenFrontEnd * LibcxxInitializerListSyntheticFrontEndCreator(CXXSyntheticChildren *, lldb::ValueObjectSP); diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp index c2bb3555908bee..5106a63d531f88 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp @@ -17,12 +17,33 @@ #include "lldb/Utility/Endian.h" #include "lldb/Utility/Status.h" #include "lldb/Utility/Stream.h" +#include "lldb/lldb-enumerations.h" #include "lldb/lldb-forward.h" +#include +#include +#include using namespace lldb; using namespace lldb_private; using namespace lldb_private::formatters; +// The flattened layout of the std::__tree_iterator::__ptr_ looks +// as follows: +// +// The following shows the contiguous block of memory: +// +// +-----------------------------+ class __tree_end_node +// __ptr_ | pointer __left_; | +// +-----------------------------+ class __tree_node_base +// | pointer __right_; | +// | __parent_pointer __parent_; | +// | bool __is_black_; | +// +-----------------------------+ class __tree_node +// | __node_value_type __value_; | <<< our key/value pair +// +-----------------------------+ +// +// where __ptr_ has type __iter_pointer. + class MapEntry { public: MapEntry() = default; @@ -181,10 +202,6 @@ class LibcxxStdMapSyntheticFrontEnd : public SyntheticChildrenFrontEnd { size_t GetIndexOfChildWithName(ConstString name) override; private: - bool GetDataType(); - - void GetValueOffset(const lldb::ValueObjectSP &node); - /// Returns the ValueObject for the __tree_node type that /// holds the key/value pair of the node at index \ref idx. /// @@ -203,8 +220,7 @@ class LibcxxStdMapSyntheticFrontEnd : public SyntheticChildrenFrontEnd { ValueObject *m_tree = nullptr; ValueObject *m_root_node = nullptr; - CompilerType m_element_type; - uint32_t m_skip_size = UINT32_MAX; + CompilerType m_node_ptr_type; size_t m_count = UINT32_MAX; std::map m_iterators; }; @@ -223,18 +239,17 @@ class LibCxxMapIteratorSyntheticFrontEnd : public SyntheticChildrenFrontEnd { size_t GetIndexOfChildWithName(ConstString name) override; - ~LibCxxMapIteratorSyntheticFrontEnd() override; + ~LibCxxMapIteratorSyntheticFrontEnd() override = default; private: - ValueObject *m_pair_ptr; - lldb::ValueObjectSP m_pair_sp; + ValueObjectSP m_pair_sp = nullptr; }; } // namespace formatters } // namespace lldb_private lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd:: LibcxxStdMapSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) - : SyntheticChildrenFrontEnd(*valobj_sp), m_element_type(), m_iterators() { + : SyntheticChildrenFrontEnd(*valobj_sp) { if (valobj_sp) Update(); } @@ -260,146 +275,44 @@ llvm::Expected lldb_private::formatters:: return m_count; } -bool lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::GetDataType() { - if (m_element_type.IsValid()) - return true; - m_element_type.Clear(); - ValueObjectSP deref; - Status error; - deref = m_root_node->Dereference(error); - if (!deref || error.Fail()) - return false; - deref = deref->GetChildMemberWithName("__value_"); - if (deref) { - m_element_type = deref->GetCompilerType(); - return true; - } - deref = m_backend.GetChildAtNamePath({"__tree_", "__pair3_"}); - if (!deref) - return false; - m_element_type = deref->GetCompilerType() - .GetTypeTemplateArgument(1) - .GetTypeTemplateArgument(1); - if (m_element_type) { - std::string name; - uint64_t bit_offset_ptr; - uint32_t bitfield_bit_size_ptr; - bool is_bitfield_ptr; - m_element_type = m_element_type.GetFieldAtIndex( - 0, name, &bit_offset_ptr, &bitfield_bit_size_ptr, &is_bitfield_ptr); - m_element_type = m_element_type.GetTypedefedType(); - return m_element_type.IsValid(); - } else { - m_element_type = m_backend.GetCompilerType().GetTypeTemplateArgument(0); - return m_element_type.IsValid(); - } -} - -void lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::GetValueOffset( - const lldb::ValueObjectSP &node) { - if (m_skip_size != UINT32_MAX) - return; - if (!node) - return; - CompilerType node_type(node->GetCompilerType()); - uint64_t bit_offset; - if (node_type.GetIndexOfFieldWithName("__value_", nullptr, &bit_offset) != - UINT32_MAX) { - // Old layout (pre d05b10ab4fc65) - m_skip_size = bit_offset / 8u; - } else { - auto ast_ctx = node_type.GetTypeSystem().dyn_cast_or_null(); - if (!ast_ctx) - return; - CompilerType tree_node_type = ast_ctx->CreateStructForIdentifier( - llvm::StringRef(), - {{"ptr0", ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()}, - {"ptr1", ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()}, - {"ptr2", ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()}, - {"cw", ast_ctx->GetBasicType(lldb::eBasicTypeBool)}, - {"payload", (m_element_type.GetCompleteType(), m_element_type)}}); - std::string child_name; - uint32_t child_byte_size; - int32_t child_byte_offset = 0; - uint32_t child_bitfield_bit_size; - uint32_t child_bitfield_bit_offset; - bool child_is_base_class; - bool child_is_deref_of_parent; - uint64_t language_flags; - auto child_type = - llvm::expectedToStdOptional(tree_node_type.GetChildCompilerTypeAtIndex( - nullptr, 4, true, true, true, child_name, child_byte_size, - child_byte_offset, child_bitfield_bit_size, - child_bitfield_bit_offset, child_is_base_class, - child_is_deref_of_parent, nullptr, language_flags)); - if (child_type && child_type->IsValid()) - m_skip_size = (uint32_t)child_byte_offset; - } -} - ValueObjectSP lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::GetKeyValuePair( size_t idx, size_t max_depth) { MapIterator iterator(m_root_node, max_depth); - const bool need_to_skip = (idx > 0); - size_t actual_advance = idx; - if (need_to_skip) { + size_t advance_by = idx; + if (idx > 0) { // If we have already created the iterator for the previous // index, we can start from there and advance by 1. auto cached_iterator = m_iterators.find(idx - 1); if (cached_iterator != m_iterators.end()) { iterator = cached_iterator->second; - actual_advance = 1; + advance_by = 1; } } - ValueObjectSP iterated_sp(iterator.advance(actual_advance)); + ValueObjectSP iterated_sp(iterator.advance(advance_by)); if (!iterated_sp) // this tree is garbage - stop return nullptr; - if (!GetDataType()) + if (!m_node_ptr_type.IsValid()) return nullptr; - if (!need_to_skip) { - Status error; - iterated_sp = iterated_sp->Dereference(error); - if (!iterated_sp || error.Fail()) - return nullptr; - - GetValueOffset(iterated_sp); - auto child_sp = iterated_sp->GetChildMemberWithName("__value_"); - if (child_sp) { - // Old layout (pre 089a7cc5dea) - iterated_sp = child_sp; - } else { - iterated_sp = iterated_sp->GetSyntheticChildAtOffset( - m_skip_size, m_element_type, true); - } + // iterated_sp is a __iter_pointer at this point. + // We can cast it to a __node_pointer (which is what libc++ does). + auto value_type_sp = iterated_sp->Cast(m_node_ptr_type); + if (!value_type_sp) + return nullptr; - if (!iterated_sp) - return nullptr; - } else { - // because of the way our debug info is made, we need to read item 0 - // first so that we can cache information used to generate other elements - if (m_skip_size == UINT32_MAX) - GetChildAtIndex(0); - - if (m_skip_size == UINT32_MAX) - return nullptr; - - iterated_sp = iterated_sp->GetSyntheticChildAtOffset(m_skip_size, - m_element_type, true); - if (!iterated_sp) - return nullptr; - } + // Finally, get the key/value pair. + value_type_sp = value_type_sp->GetChildMemberWithName("__value_"); + if (!value_type_sp) + return nullptr; m_iterators[idx] = iterator; - assert(iterated_sp != nullptr && - "Cached MapIterator for invalid ValueObject"); - return iterated_sp; + return value_type_sp; } lldb::ValueObjectSP @@ -459,6 +372,9 @@ lldb_private::formatters::LibcxxStdMapSyntheticFrontEnd::Update() { if (!m_tree) return lldb::ChildCacheState::eRefetch; m_root_node = m_tree->GetChildMemberWithName("__begin_node_").get(); + m_node_ptr_type = + m_tree->GetCompilerType().GetDirectNestedTypeWithName("__node_pointer"); + return lldb::ChildCacheState::eRefetch; } @@ -480,7 +396,7 @@ lldb_private::formatters::LibcxxStdMapSyntheticFrontEndCreator( lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: LibCxxMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) - : SyntheticChildrenFrontEnd(*valobj_sp), m_pair_ptr(), m_pair_sp() { + : SyntheticChildrenFrontEnd(*valobj_sp) { if (valobj_sp) Update(); } @@ -488,117 +404,63 @@ lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: lldb::ChildCacheState lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd::Update() { m_pair_sp.reset(); - m_pair_ptr = nullptr; ValueObjectSP valobj_sp = m_backend.GetSP(); if (!valobj_sp) return lldb::ChildCacheState::eRefetch; TargetSP target_sp(valobj_sp->GetTargetSP()); - if (!target_sp) return lldb::ChildCacheState::eRefetch; - // this must be a ValueObject* because it is a child of the ValueObject we - // are producing children for it if were a ValueObjectSP, we would end up - // with a loop (iterator -> synthetic -> child -> parent == iterator) and - // that would in turn leak memory by never allowing the ValueObjects to die - // and free their memory - m_pair_ptr = valobj_sp - ->GetValueForExpressionPath( - ".__i_.__ptr_->__value_", nullptr, nullptr, - ValueObject::GetValueForExpressionPathOptions() - .DontCheckDotVsArrowSyntax() - .SetSyntheticChildrenTraversal( - ValueObject::GetValueForExpressionPathOptions:: - SyntheticChildrenTraversal::None), - nullptr) - .get(); - - if (!m_pair_ptr) { - m_pair_ptr = valobj_sp - ->GetValueForExpressionPath( - ".__i_.__ptr_", nullptr, nullptr, - ValueObject::GetValueForExpressionPathOptions() - .DontCheckDotVsArrowSyntax() - .SetSyntheticChildrenTraversal( - ValueObject::GetValueForExpressionPathOptions:: - SyntheticChildrenTraversal::None), - nullptr) - .get(); - if (m_pair_ptr) { - auto __i_(valobj_sp->GetChildMemberWithName("__i_")); - if (!__i_) { - m_pair_ptr = nullptr; - return lldb::ChildCacheState::eRefetch; - } - CompilerType pair_type( - __i_->GetCompilerType().GetTypeTemplateArgument(0)); - std::string name; - uint64_t bit_offset_ptr; - uint32_t bitfield_bit_size_ptr; - bool is_bitfield_ptr; - pair_type = pair_type.GetFieldAtIndex( - 0, name, &bit_offset_ptr, &bitfield_bit_size_ptr, &is_bitfield_ptr); - if (!pair_type) { - m_pair_ptr = nullptr; - return lldb::ChildCacheState::eRefetch; - } + // m_backend is a std::map::iterator + // ...which is a __map_iterator<__tree_iterator<..., __node_pointer, ...>> + // + // Then, __map_iterator::__i_ is a __tree_iterator + auto tree_iter_sp = valobj_sp->GetChildMemberWithName("__i_"); + if (!tree_iter_sp) + return lldb::ChildCacheState::eRefetch; - auto addr(m_pair_ptr->GetValueAsUnsigned(LLDB_INVALID_ADDRESS)); - m_pair_ptr = nullptr; - if (addr && addr != LLDB_INVALID_ADDRESS) { - auto ts = pair_type.GetTypeSystem(); - auto ast_ctx = ts.dyn_cast_or_null(); - if (!ast_ctx) - return lldb::ChildCacheState::eRefetch; - - // Mimick layout of std::__tree_iterator::__ptr_ and read it in - // from process memory. - // - // The following shows the contiguous block of memory: - // - // +-----------------------------+ class __tree_end_node - // __ptr_ | pointer __left_; | - // +-----------------------------+ class __tree_node_base - // | pointer __right_; | - // | __parent_pointer __parent_; | - // | bool __is_black_; | - // +-----------------------------+ class __tree_node - // | __node_value_type __value_; | <<< our key/value pair - // +-----------------------------+ - // - CompilerType tree_node_type = ast_ctx->CreateStructForIdentifier( - llvm::StringRef(), - {{"ptr0", - ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()}, - {"ptr1", - ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()}, - {"ptr2", - ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()}, - {"cw", ast_ctx->GetBasicType(lldb::eBasicTypeBool)}, - {"payload", pair_type}}); - std::optional size = tree_node_type.GetByteSize(nullptr); - if (!size) - return lldb::ChildCacheState::eRefetch; - WritableDataBufferSP buffer_sp(new DataBufferHeap(*size, 0)); - ProcessSP process_sp(target_sp->GetProcessSP()); - Status error; - process_sp->ReadMemory(addr, buffer_sp->GetBytes(), - buffer_sp->GetByteSize(), error); - if (error.Fail()) - return lldb::ChildCacheState::eRefetch; - DataExtractor extractor(buffer_sp, process_sp->GetByteOrder(), - process_sp->GetAddressByteSize()); - auto pair_sp = CreateValueObjectFromData( - "pair", extractor, valobj_sp->GetExecutionContextRef(), - tree_node_type); - if (pair_sp) - m_pair_sp = pair_sp->GetChildAtIndex(4); - } - } + // Type is __tree_iterator::__node_pointer + // (We could alternatively also get this from the template argument) + auto node_pointer_type = + tree_iter_sp->GetCompilerType().GetDirectNestedTypeWithName( + "__node_pointer"); + if (!node_pointer_type.IsValid()) + return lldb::ChildCacheState::eRefetch; + + // __ptr_ is a __tree_iterator::__iter_pointer + auto iter_pointer_sp = tree_iter_sp->GetChildMemberWithName("__ptr_"); + if (!iter_pointer_sp) + return lldb::ChildCacheState::eRefetch; + + // Cast the __iter_pointer to a __node_pointer (which stores our key/value + // pair) + auto node_pointer_sp = iter_pointer_sp->Cast(node_pointer_type); + if (!node_pointer_sp) + return lldb::ChildCacheState::eRefetch; + + auto key_value_sp = node_pointer_sp->GetChildMemberWithName("__value_"); + if (!key_value_sp) + return lldb::ChildCacheState::eRefetch; + + // Create the synthetic child, which is a pair where the key and value can be + // retrieved by querying the synthetic frontend for + // GetIndexOfChildWithName("first") and GetIndexOfChildWithName("second") + // respectively. + // + // std::map stores the actual key/value pair in value_type::__cc_ (or + // previously __cc). + key_value_sp = key_value_sp->Clone(ConstString("pair")); + if (key_value_sp->GetNumChildrenIgnoringErrors() == 1) { + auto child0_sp = key_value_sp->GetChildAtIndex(0); + if (child0_sp && + (child0_sp->GetName() == "__cc_" || child0_sp->GetName() == "__cc")) + key_value_sp = child0_sp->Clone(ConstString("pair")); } + m_pair_sp = key_value_sp; + return lldb::ChildCacheState::eRefetch; } @@ -610,11 +472,10 @@ llvm::Expected lldb_private::formatters:: lldb::ValueObjectSP lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd::GetChildAtIndex( uint32_t idx) { - if (m_pair_ptr) - return m_pair_ptr->GetChildAtIndex(idx); - if (m_pair_sp) - return m_pair_sp->GetChildAtIndex(idx); - return lldb::ValueObjectSP(); + if (!m_pair_sp) + return nullptr; + + return m_pair_sp->GetChildAtIndex(idx); } bool lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: @@ -624,17 +485,10 @@ bool lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: size_t lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: GetIndexOfChildWithName(ConstString name) { - if (name == "first") - return 0; - if (name == "second") - return 1; - return UINT32_MAX; -} + if (!m_pair_sp) + return UINT32_MAX; -lldb_private::formatters::LibCxxMapIteratorSyntheticFrontEnd:: - ~LibCxxMapIteratorSyntheticFrontEnd() { - // this will be deleted when its parent dies (since it's a child object) - // delete m_pair_ptr; + return m_pair_sp->GetIndexOfChildWithName(name); } SyntheticChildrenFrontEnd * diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index af29fdb6d00109..93e7f4f4fd86c1 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -51,6 +51,30 @@ class LibcxxStdUnorderedMapSyntheticFrontEnd ValueObject *m_next_element = nullptr; std::vector> m_elements_cache; }; + +class LibCxxUnorderedMapIteratorSyntheticFrontEnd + : public SyntheticChildrenFrontEnd { +public: + LibCxxUnorderedMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); + + ~LibCxxUnorderedMapIteratorSyntheticFrontEnd() override = default; + + llvm::Expected CalculateNumChildren() override; + + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; + + lldb::ChildCacheState Update() override; + + bool MightHaveChildren() override; + + size_t GetIndexOfChildWithName(ConstString name) override; + +private: + lldb::ValueObjectSP m_pair_sp; ///< ValueObject for the key/value pair + ///< that the iterator currently points + ///< to. +}; + } // namespace formatters } // namespace lldb_private @@ -246,3 +270,119 @@ lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEndCreator( return (valobj_sp ? new LibcxxStdUnorderedMapSyntheticFrontEnd(valobj_sp) : nullptr); } + +lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: + LibCxxUnorderedMapIteratorSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) + : SyntheticChildrenFrontEnd(*valobj_sp) { + if (valobj_sp) + Update(); +} + +lldb::ChildCacheState lldb_private::formatters:: + LibCxxUnorderedMapIteratorSyntheticFrontEnd::Update() { + m_pair_sp.reset(); + + ValueObjectSP valobj_sp = m_backend.GetSP(); + if (!valobj_sp) + return lldb::ChildCacheState::eRefetch; + + TargetSP target_sp(valobj_sp->GetTargetSP()); + + if (!target_sp) + return lldb::ChildCacheState::eRefetch; + + // Get the unordered_map::iterator + // m_backend is an 'unordered_map::iterator', aka a + // '__hash_map_iterator<__hash_table::iterator>' + // + // __hash_map_iterator::__i_ is a __hash_table::iterator (aka + // __hash_iterator<__node_pointer>) + auto hash_iter_sp = valobj_sp->GetChildMemberWithName("__i_"); + if (!hash_iter_sp) + return lldb::ChildCacheState::eRefetch; + + // Type is '__hash_iterator<__node_pointer>' + auto hash_iter_type = hash_iter_sp->GetCompilerType(); + if (!hash_iter_type.IsValid()) + return lldb::ChildCacheState::eRefetch; + + // Type is '__node_pointer' + auto node_pointer_type = hash_iter_type.GetTypeTemplateArgument(0); + if (!node_pointer_type.IsValid()) + return lldb::ChildCacheState::eRefetch; + + // Cast the __hash_iterator to a __node_pointer (which stores our key/value + // pair) + auto hash_node_sp = hash_iter_sp->Cast(node_pointer_type); + if (!hash_node_sp) + return lldb::ChildCacheState::eRefetch; + + auto key_value_sp = hash_node_sp->GetChildMemberWithName("__value_"); + if (!key_value_sp) { + // clang-format off + // Since D101206 (ba79fb2e1f), libc++ wraps the `__value_` in an + // anonymous union. + // Child 0: __hash_node_base base class + // Child 1: __hash_ + // Child 2: anonymous union + // clang-format on + auto anon_union_sp = hash_node_sp->GetChildAtIndex(2); + if (!anon_union_sp) + return lldb::ChildCacheState::eRefetch; + + key_value_sp = anon_union_sp->GetChildMemberWithName("__value_"); + if (!key_value_sp) + return lldb::ChildCacheState::eRefetch; + } + + // Create the synthetic child, which is a pair where the key and value can be + // retrieved by querying the synthetic frontend for + // GetIndexOfChildWithName("first") and GetIndexOfChildWithName("second") + // respectively. + // + // std::unordered_map stores the actual key/value pair in + // __hash_value_type::__cc_ (or previously __cc). + auto potential_child_sp = key_value_sp->Clone(ConstString("pair")); + if (potential_child_sp) + if (potential_child_sp->GetNumChildrenIgnoringErrors() == 1) + if (auto child0_sp = potential_child_sp->GetChildAtIndex(0); + child0_sp->GetName() == "__cc_" || child0_sp->GetName() == "__cc") + potential_child_sp = child0_sp->Clone(ConstString("pair")); + + m_pair_sp = potential_child_sp; + + return lldb::ChildCacheState::eRefetch; +} + +llvm::Expected lldb_private::formatters:: + LibCxxUnorderedMapIteratorSyntheticFrontEnd::CalculateNumChildren() { + return 2; +} + +lldb::ValueObjectSP lldb_private::formatters:: + LibCxxUnorderedMapIteratorSyntheticFrontEnd::GetChildAtIndex(uint32_t idx) { + if (m_pair_sp) + return m_pair_sp->GetChildAtIndex(idx); + return lldb::ValueObjectSP(); +} + +bool lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: + MightHaveChildren() { + return true; +} + +size_t lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd:: + GetIndexOfChildWithName(ConstString name) { + if (name == "first") + return 0; + if (name == "second") + return 1; + return UINT32_MAX; +} + +SyntheticChildrenFrontEnd * +lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEndCreator( + CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) { + return (valobj_sp ? new LibCxxUnorderedMapIteratorSyntheticFrontEnd(valobj_sp) + : nullptr); +} diff --git a/lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp index dc7697f71d6a6f..a69c10081ff190 100644 --- a/lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp @@ -175,11 +175,6 @@ Status TargetThreadWindows::DoResume() { return Status(); } -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wcast-function-type-mismatch" -#endif - const char *TargetThreadWindows::GetName() { Log *log = GetLog(LLDBLog::Thread); static GetThreadDescriptionFunctionPtr GetThreadDescription = []() { @@ -205,7 +200,3 @@ const char *TargetThreadWindows::GetName() { return m_name.c_str(); } - -#if defined(__clang__) -#pragma clang diagnostic pop -#endif diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index d890ad92e83122..2608a9c5fb79a2 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -184,6 +184,15 @@ ObjectFileSP ObjectFile::FindPlugin(const lldb::ModuleSP &module_sp, return object_file_sp; } +bool ObjectFile::IsObjectFile(lldb_private::FileSpec file_spec) { + DataBufferSP data_sp; + offset_t data_offset = 0; + ModuleSP module_sp = std::make_shared(file_spec); + return static_cast(ObjectFile::FindPlugin( + module_sp, &file_spec, 0, FileSystem::Instance().GetByteSize(file_spec), + data_sp, data_offset)); +} + size_t ObjectFile::GetModuleSpecifications(const FileSpec &file, lldb::offset_t file_offset, lldb::offset_t file_size, diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index c06abd3070f317..bb90c377d86b2d 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -732,7 +732,6 @@ Status Platform::ResolveExecutable(const ModuleSpec &module_spec, lldb::ModuleSP &exe_module_sp, const FileSpecList *module_search_paths_ptr) { - Status error; // We may connect to a process and use the provided executable (Don't use // local $PATH). @@ -741,55 +740,57 @@ Platform::ResolveExecutable(const ModuleSpec &module_spec, // Resolve any executable within a bundle on MacOSX Host::ResolveExecutableInBundle(resolved_module_spec.GetFileSpec()); - if (FileSystem::Instance().Exists(resolved_module_spec.GetFileSpec()) || - module_spec.GetUUID().IsValid()) { - if (resolved_module_spec.GetArchitecture().IsValid() || - resolved_module_spec.GetUUID().IsValid()) { - error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp, - module_search_paths_ptr, nullptr, - nullptr); + if (!FileSystem::Instance().Exists(resolved_module_spec.GetFileSpec()) && + !module_spec.GetUUID().IsValid()) + return Status::createWithFormat("'{0}' does not exist", + resolved_module_spec.GetFileSpec()); + + if (resolved_module_spec.GetArchitecture().IsValid() || + resolved_module_spec.GetUUID().IsValid()) { + Status error = + ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp, + module_search_paths_ptr, nullptr, nullptr); + if (exe_module_sp && exe_module_sp->GetObjectFile()) + return error; + exe_module_sp.reset(); + } + // No valid architecture was specified or the exact arch wasn't found. + // Ask the platform for the architectures that we should be using (in the + // correct order) and see if we can find a match that way. + StreamString arch_names; + llvm::ListSeparator LS; + ArchSpec process_host_arch; + Status error; + for (const ArchSpec &arch : GetSupportedArchitectures(process_host_arch)) { + resolved_module_spec.GetArchitecture() = arch; + error = + ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp, + module_search_paths_ptr, nullptr, nullptr); + if (error.Success()) { if (exe_module_sp && exe_module_sp->GetObjectFile()) - return error; - exe_module_sp.reset(); + break; + error.SetErrorToGenericError(); } - // No valid architecture was specified or the exact arch wasn't found. - // Ask the platform for the architectures that we should be using (in the - // correct order) and see if we can find a match that way. - StreamString arch_names; - llvm::ListSeparator LS; - ArchSpec process_host_arch; - for (const ArchSpec &arch : GetSupportedArchitectures(process_host_arch)) { - resolved_module_spec.GetArchitecture() = arch; - error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp, - module_search_paths_ptr, nullptr, - nullptr); - if (error.Success()) { - if (exe_module_sp && exe_module_sp->GetObjectFile()) - break; - error.SetErrorToGenericError(); - } - arch_names << LS << arch.GetArchitectureName(); - } + arch_names << LS << arch.GetArchitectureName(); + } - if (error.Fail() || !exe_module_sp) { - if (FileSystem::Instance().Readable(resolved_module_spec.GetFileSpec())) { - error.SetErrorStringWithFormatv( - "'{0}' doesn't contain any '{1}' platform architectures: {2}", - resolved_module_spec.GetFileSpec(), GetPluginName(), - arch_names.GetData()); - } else { - error.SetErrorStringWithFormatv("'{0}' is not readable", - resolved_module_spec.GetFileSpec()); - } - } - } else { - error.SetErrorStringWithFormatv("'{0}' does not exist", + if (exe_module_sp && error.Success()) + return {}; + + if (!FileSystem::Instance().Readable(resolved_module_spec.GetFileSpec())) + return Status::createWithFormat("'{0}' is not readable", resolved_module_spec.GetFileSpec()); - } - return error; + if (!ObjectFile::IsObjectFile(resolved_module_spec.GetFileSpec())) + return Status::createWithFormat("'{0}' is not a valid executable", + resolved_module_spec.GetFileSpec()); + + return Status::createWithFormat( + "'{0}' doesn't contain any '{1}' platform architectures: {2}", + resolved_module_spec.GetFileSpec(), GetPluginName(), + arch_names.GetData()); } Status Platform::ResolveSymbolFile(Target &target, const ModuleSpec &sym_spec, diff --git a/lldb/source/Utility/Listener.cpp b/lldb/source/Utility/Listener.cpp index 0b28cb5cdc6424..1efaad392502c6 100644 --- a/lldb/source/Utility/Listener.cpp +++ b/lldb/source/Utility/Listener.cpp @@ -18,28 +18,21 @@ using namespace lldb; using namespace lldb_private; -Listener::Listener(const char *name) - : m_name(name), m_broadcasters(), m_broadcasters_mutex(), m_events(), - m_events_mutex(), m_is_shadow() { - Log *log = GetLog(LLDBLog::Object); - if (log != nullptr) - LLDB_LOGF(log, "%p Listener::Listener('%s')", static_cast(this), - m_name.c_str()); +Listener::Listener(const char *name) : m_name(name) { + LLDB_LOGF(GetLog(LLDBLog::Object), "%p Listener::Listener('%s')", + static_cast(this), m_name.c_str()); } Listener::~Listener() { - Log *log = GetLog(LLDBLog::Object); - // Don't call Clear() from here as that can cause races. See #96750. - LLDB_LOGF(log, "%p Listener::%s('%s')", static_cast(this), - __FUNCTION__, m_name.c_str()); + LLDB_LOGF(GetLog(LLDBLog::Object), "%p Listener::%s('%s')", + static_cast(this), __FUNCTION__, m_name.c_str()); } void Listener::Clear() { Log *log = GetLog(LLDBLog::Object); - std::lock_guard broadcasters_guard( - m_broadcasters_mutex); + std::lock_guard broadcasters_guard(m_broadcasters_mutex); broadcaster_collection::iterator pos, end = m_broadcasters.end(); for (pos = m_broadcasters.begin(); pos != end; ++pos) { Broadcaster::BroadcasterImplSP broadcaster_sp(pos->first.lock()); @@ -68,8 +61,7 @@ uint32_t Listener::StartListeningForEvents(Broadcaster *broadcaster, // Scope for "locker" // Tell the broadcaster to add this object as a listener { - std::lock_guard broadcasters_guard( - m_broadcasters_mutex); + std::lock_guard broadcasters_guard(m_broadcasters_mutex); Broadcaster::BroadcasterImplWP impl_wp(broadcaster->GetBroadcasterImpl()); m_broadcasters.insert( std::make_pair(impl_wp, BroadcasterInfo(event_mask))); @@ -99,8 +91,7 @@ uint32_t Listener::StartListeningForEvents(Broadcaster *broadcaster, // Scope for "locker" // Tell the broadcaster to add this object as a listener { - std::lock_guard broadcasters_guard( - m_broadcasters_mutex); + std::lock_guard broadcasters_guard(m_broadcasters_mutex); Broadcaster::BroadcasterImplWP impl_wp(broadcaster->GetBroadcasterImpl()); m_broadcasters.insert(std::make_pair( impl_wp, BroadcasterInfo(event_mask, callback, callback_user_data))); @@ -131,8 +122,7 @@ bool Listener::StopListeningForEvents(Broadcaster *broadcaster, if (broadcaster) { // Scope for "locker" { - std::lock_guard broadcasters_guard( - m_broadcasters_mutex); + std::lock_guard broadcasters_guard(m_broadcasters_mutex); m_broadcasters.erase(broadcaster->GetBroadcasterImpl()); } // Remove the broadcaster from our set of broadcasters @@ -147,8 +137,7 @@ bool Listener::StopListeningForEvents(Broadcaster *broadcaster, void Listener::BroadcasterWillDestruct(Broadcaster *broadcaster) { // Scope for "broadcasters_locker" { - std::lock_guard broadcasters_guard( - m_broadcasters_mutex); + std::lock_guard broadcasters_guard(m_broadcasters_mutex); m_broadcasters.erase(broadcaster->GetBroadcasterImpl()); } @@ -322,7 +311,7 @@ bool Listener::GetEvent(EventSP &event_sp, const Timeout &timeout) { size_t Listener::HandleBroadcastEvent(EventSP &event_sp) { size_t num_handled = 0; - std::lock_guard guard(m_broadcasters_mutex); + std::lock_guard guard(m_broadcasters_mutex); Broadcaster *broadcaster = event_sp->GetBroadcaster(); if (!broadcaster) return 0; @@ -357,7 +346,7 @@ Listener::StartListeningForEventSpec(const BroadcasterManagerSP &manager_sp, // The BroadcasterManager mutex must be locked before m_broadcasters_mutex to // avoid violating the lock hierarchy (manager before broadcasters). std::lock_guard manager_guard(manager_sp->m_manager_mutex); - std::lock_guard guard(m_broadcasters_mutex); + std::lock_guard guard(m_broadcasters_mutex); uint32_t bits_acquired = manager_sp->RegisterListenerForEventsNoLock( this->shared_from_this(), event_spec); @@ -379,7 +368,7 @@ bool Listener::StopListeningForEventSpec(const BroadcasterManagerSP &manager_sp, // The BroadcasterManager mutex must be locked before m_broadcasters_mutex to // avoid violating the lock hierarchy (manager before broadcasters). std::lock_guard manager_guard(manager_sp->m_manager_mutex); - std::lock_guard guard(m_broadcasters_mutex); + std::lock_guard guard(m_broadcasters_mutex); return manager_sp->UnregisterListenerForEventsNoLock(this->shared_from_this(), event_spec); } diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py index d9e316b9b8f4ef..dd7ac364fea04b 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py @@ -54,8 +54,34 @@ def cleanup(): self.expect("frame variable iimI", substrs=["first = 43981", "second = 61681"]) self.expect("expr iimI", substrs=["first = 43981", "second = 61681"]) + self.expect("frame variable iimI.first", substrs=["first = 43981"]) + self.expect("frame variable iimI.first", substrs=["second"], matching=False) + self.expect("frame variable iimI.second", substrs=["second = 61681"]) + self.expect("frame variable iimI.second", substrs=["first"], matching=False) + self.expect("frame variable simI", substrs=['first = "world"', "second = 42"]) self.expect("expr simI", substrs=['first = "world"', "second = 42"]) + self.expect("frame variable simI.first", substrs=['first = "world"']) + self.expect("frame variable simI.first", substrs=["second"], matching=False) + self.expect("frame variable simI.second", substrs=["second = 42"]) + self.expect("frame variable simI.second", substrs=["first"], matching=False) + self.expect("frame variable svI", substrs=['item = "hello"']) self.expect("expr svI", substrs=['item = "hello"']) + + self.expect("frame variable iiumI", substrs=["first = 61453", "second = 51966"]) + self.expect("expr iiumI", substrs=["first = 61453", "second = 51966"]) + + self.expect("frame variable siumI", substrs=['first = "hello"', "second = 137"]) + self.expect("expr siumI", substrs=['first = "hello"', "second = 137"]) + + self.expect("frame variable iiumI.first", substrs=["first = 61453"]) + self.expect("frame variable iiumI.first", substrs=["second"], matching=False) + self.expect("frame variable iiumI.second", substrs=["second = 51966"]) + self.expect("frame variable iiumI.second", substrs=["first"], matching=False) + + self.expect("frame variable siumI.first", substrs=['first = "hello"']) + self.expect("frame variable siumI.first", substrs=["second"], matching=False) + self.expect("frame variable siumI.second", substrs=["second = 137"]) + self.expect("frame variable siumI.second", substrs=["first"], matching=False) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp index 9d1cbfd9128689..e53c0f167c3254 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp @@ -1,38 +1,50 @@ -#include #include +#include #include typedef std::map intint_map; typedef std::map strint_map; +typedef std::unordered_map intint_umap; +typedef std::unordered_map strint_umap; + typedef std::vector int_vector; typedef std::vector string_vector; -typedef intint_map::iterator iimter; -typedef strint_map::iterator simter; +typedef intint_map::iterator ii_map_iter; +typedef strint_map::iterator si_map_iter; +typedef intint_umap::iterator ii_umap_iter; +typedef strint_umap::iterator si_umap_iter; typedef int_vector::iterator ivter; typedef string_vector::iterator svter; -int main() -{ - intint_map iim; - iim[0xABCD] = 0xF0F1; +int main() { + intint_map iim; + iim[0xABCD] = 0xF0F1; + + strint_map sim; + sim["world"] = 42; + + intint_umap iium; + iium[0xF00D] = 0xCAFE; - strint_map sim; - sim["world"] = 42; + strint_umap sium; + sium["hello"] = 137; - int_vector iv; - iv.push_back(3); + int_vector iv; + iv.push_back(3); - string_vector sv; - sv.push_back("hello"); + string_vector sv; + sv.push_back("hello"); - iimter iimI = iim.begin(); - simter simI = sim.begin(); + ii_map_iter iimI = iim.begin(); + si_map_iter simI = sim.begin(); + ii_umap_iter iiumI = iium.begin(); + si_umap_iter siumI = sium.begin(); - ivter ivI = iv.begin(); - svter svI = sv.begin(); + ivter ivI = iv.begin(); + svter svI = sv.begin(); - return 0; // Set break point at this line. + return 0; // Set break point at this line. } diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py index ba1641888b6f30..ea4a53fcb4097a 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/TestDataFormatterLibStdcxxVariant.py @@ -21,15 +21,17 @@ def test_with_run_command(self): lldbutil.continue_to_breakpoint(self.process, bkpt) - self.expect( - "frame variable v1", - substrs=["v1 = Active Type = int {", "Value = 12", "}"], - ) - - self.expect( - "frame variable v1_ref", - substrs=["v1_ref = Active Type = int : {", "Value = 12", "}"], - ) + for name in ["v1", "v1_typedef"]: + self.expect( + "frame variable " + name, + substrs=[name + " = Active Type = int {", "Value = 12", "}"], + ) + + for name in ["v1_ref", "v1_typedef_ref"]: + self.expect( + "frame variable " + name, + substrs=[name + " = Active Type = int : {", "Value = 12", "}"], + ) self.expect( "frame variable v_v1", diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp index 545318f9358b67..36e0f74f831f8a 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/variant/main.cpp @@ -14,6 +14,10 @@ int main() { std::variant v1; std::variant &v1_ref = v1; + using V1_typedef = std::variant; + V1_typedef v1_typedef; + V1_typedef &v1_typedef_ref = v1_typedef; + std::variant v2; std::variant v3; std::variant> v_v1; @@ -43,6 +47,7 @@ int main() { v_many_types_no_value; v1 = 12; // v contains int + v1_typedef = v1; v_v1 = v1; int i = std::get(v1); printf("%d\n", i); // break here diff --git a/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py b/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py index f4e883d314644a..d660844405e137 100644 --- a/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py +++ b/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py @@ -40,28 +40,28 @@ def test(self): target = self.dbg.CreateTarget(exe) self.assertTrue(target, VALID_TARGET) - bp1_line = line_number("main.c", "// Set breakpoint 1 here") - bp2_line = line_number("main.c", "// Set breakpoint 2 here") - - lldbutil.run_break_set_by_file_and_line( - self, "main.c", bp1_line, num_expected_locations=1 + main_dot_c = lldb.SBFileSpec("main.c") + bp1 = target.BreakpointCreateBySourceRegex( + "// Set breakpoint 1 here", main_dot_c ) - lldbutil.run_break_set_by_file_and_line( - self, "main.c", bp2_line, num_expected_locations=1 + bp2 = target.BreakpointCreateBySourceRegex( + "// Set breakpoint 2 here", main_dot_c ) process = target.LaunchSimple(None, None, self.get_process_working_directory()) self.assertTrue(process, VALID_PROCESS) - thread = process.GetThreadAtIndex(0) + thread = self.thread() + if self.TraceOn(): print("Backtrace at the first breakpoint:") for f in thread.frames: print(f) + # Check that we have stopped at correct breakpoint. self.assertEqual( - process.GetThreadAtIndex(0).frame[0].GetLineEntry().GetLine(), - bp1_line, + thread.frame[0].GetLineEntry().GetLine(), + bp1.GetLocationAtIndex(0).GetAddress().GetLineEntry().GetLine(), "LLDB reported incorrect line number.", ) @@ -70,7 +70,6 @@ def test(self): # 'continue' command. process.Continue() - thread = process.GetThreadAtIndex(0) if self.TraceOn(): print("Backtrace at the second breakpoint:") for f in thread.frames: @@ -78,7 +77,7 @@ def test(self): # Check that we have stopped at the breakpoint self.assertEqual( thread.frame[0].GetLineEntry().GetLine(), - bp2_line, + bp2.GetLocationAtIndex(0).GetAddress().GetLineEntry().GetLine(), "LLDB reported incorrect line number.", ) # Double-check with GetPCAddress() diff --git a/lldb/test/Shell/ObjectFile/PECOFF/invalid-export-table.yaml b/lldb/test/Shell/ObjectFile/PECOFF/invalid-export-table.yaml index 389261ad9b10a5..e6564661b96fd2 100644 --- a/lldb/test/Shell/ObjectFile/PECOFF/invalid-export-table.yaml +++ b/lldb/test/Shell/ObjectFile/PECOFF/invalid-export-table.yaml @@ -4,7 +4,7 @@ # RUN: yaml2obj %s -o %t.exe # RUN: %lldb %t.exe 2>&1 | FileCheck %s -# CHECK: error: '{{.*}}' doesn't contain any {{.*}} platform architectures +# CHECK: error: '{{.*}}' is not a valid executable --- !COFF OptionalHeader: AddressOfEntryPoint: 4096 diff --git a/llvm/docs/CommandGuide/llvm-objcopy.rst b/llvm/docs/CommandGuide/llvm-objcopy.rst index a62acfc8fdcd80..eaeb19e26c7b67 100644 --- a/llvm/docs/CommandGuide/llvm-objcopy.rst +++ b/llvm/docs/CommandGuide/llvm-objcopy.rst @@ -299,6 +299,10 @@ them. Allow :program:`llvm-objcopy` to remove sections even if it would leave invalid section references. Any invalid sh_link fields will be set to zero. +.. option:: --change-section-lma \*{+-} + + Shift LMA of non-zero-sized segments by ````. + .. option:: --change-start , --adjust-start Add ```` to the program's start address. Can be specified multiple diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index a6bfd551193989..ac2711a8c0ef89 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -189,6 +189,11 @@ Changes to the RISC-V Backend * B (the collection of the Zba, Zbb, Zbs extensions) is supported. * Added smcdeleg, ssccfg, smcsrind, and sscsrind extensions to -march. * ``-mcpu=syntacore-scr3-rv32`` and ``-mcpu=syntacore-scr3-rv64`` were added. +* The default atomics mapping was changed to emit an additional trailing fence + for sequentially consistent stores, offering compatibility with a future + mapping using load-acquire and store-release instructions while remaining + fully compatible with objects produced prior to this change. The mapping + (ABI) used is recorded as an ELF attribute. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 91e1872e9bd6ff..eab7fec6b6e08f 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -345,7 +345,7 @@ class BranchProbabilityInfo { /// Helper to construct LoopBlock for \p BB. LoopBlock getLoopBlock(const BasicBlock *BB) const { - return LoopBlock(BB, *LI, *SccI.get()); + return LoopBlock(BB, *LI, *SccI); } /// Returns true if destination block belongs to some loop and source block is diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index c74e76604e786d..f6bb044392938e 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -863,7 +863,7 @@ class LoopAccessInfoManager { const LoopAccessInfo &getInfo(Loop &L); - void clear() { LoopAccessInfoMap.clear(); } + void clear(); bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv); diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index 0333f457c1a2d8..7d726a246ca3cb 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -45,7 +45,7 @@ class MLInlineAdvisor : public InlineAdvisor { bool isForcedToStop() const { return ForceStop; } int64_t getLocalCalls(Function &F); - const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); } + const MLModelRunner &getModelRunner() const { return *ModelRunner; } FunctionPropertiesInfo &getCachedFPI(Function &) const; protected: diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 5a2425257b03fa..ac828021dd2ae1 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -937,7 +937,7 @@ class MemorySSAAnalysis : public AnalysisInfoMixin { struct Result { Result(std::unique_ptr &&MSSA) : MSSA(std::move(MSSA)) {} - MemorySSA &getMSSA() { return *MSSA.get(); } + MemorySSA &getMSSA() { return *MSSA; } std::unique_ptr MSSA; diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index 2898cdd3d7b0ca..b81eb5f60ab7ed 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -281,6 +281,18 @@ class ValueLatticeElement { return std::nullopt; } + ConstantRange asConstantRange(Type *Ty, bool UndefAllowed = false) const { + assert(Ty->isIntOrIntVectorTy() && "Must be integer type"); + if (isConstantRange(UndefAllowed)) + return getConstantRange(); + if (isConstant()) + return getConstant()->toConstantRange(); + unsigned BW = Ty->getScalarSizeInBits(); + if (isUnknown()) + return ConstantRange::getEmpty(BW); + return ConstantRange::getFull(BW); + } + bool markOverdefined() { if (isOverdefined()) return false; diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 32cc9ff8cbb787..456cffff6b4a7c 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include #include +#include namespace llvm { namespace ELF { @@ -1430,6 +1431,14 @@ struct Elf64_Rela { } }; +// In-memory representation of CREL. The serialized representation uses LEB128. +template struct Elf_Crel { + std::conditional_t r_offset; + uint32_t r_symidx; + uint32_t r_type; + std::conditional_t r_addend; +}; + // Relocation entry without explicit addend or info (relative relocations only). typedef Elf64_Xword Elf64_Relr; // offset/bitmap for relative relocations diff --git a/llvm/include/llvm/CodeGenData/OutlinedHashTree.h b/llvm/include/llvm/CodeGenData/OutlinedHashTree.h new file mode 100644 index 00000000000000..2c8a9288f8a8c7 --- /dev/null +++ b/llvm/include/llvm/CodeGenData/OutlinedHashTree.h @@ -0,0 +1,99 @@ +//===- OutlinedHashTree.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This defines the OutlinedHashTree class. It contains sequences of stable +// hash values of instructions that have been outlined. This OutlinedHashTree +// can be used to track the outlined instruction sequences across modules. +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_CODEGENDATA_OUTLINEDHASHTREE_H +#define LLVM_CODEGENDATA_OUTLINEDHASHTREE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StableHashing.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace llvm { + +/// A HashNode is an entry in an OutlinedHashTree, holding a hash value +/// and a collection of Successors (other HashNodes). If a HashNode has +/// a positive terminal value (Terminals > 0), it signifies the end of +/// a hash sequence with that occurrence count. +struct HashNode { + /// The hash value of the node. + stable_hash Hash = 0; + /// The number of terminals in the sequence ending at this node. + std::optional Terminals; + /// The successors of this node. + /// We don't use DenseMap as a stable_hash value can be tombstone. + std::unordered_map> Successors; +}; + +class OutlinedHashTree { + + using EdgeCallbackFn = + std::function; + using NodeCallbackFn = std::function; + + using HashSequence = SmallVector; + using HashSequencePair = std::pair; + +public: + /// Walks every edge and node in the OutlinedHashTree and calls CallbackEdge + /// for the edges and CallbackNode for the nodes with the stable_hash for + /// the source and the stable_hash of the sink for an edge. These generic + /// callbacks can be used to traverse a OutlinedHashTree for the purpose of + /// print debugging or serializing it. + void walkGraph(NodeCallbackFn CallbackNode, + EdgeCallbackFn CallbackEdge = nullptr, + bool SortedWalk = false) const; + + /// Release all hash nodes except the root hash node. + void clear() { + assert(getRoot()->Hash == 0 && !getRoot()->Terminals); + getRoot()->Successors.clear(); + } + + /// \returns true if the hash tree has only the root node. + bool empty() { return size() == 1; } + + /// \returns the size of a OutlinedHashTree by traversing it. If + /// \p GetTerminalCountOnly is true, it only counts the terminal nodes + /// (meaning it returns the the number of hash sequences in the + /// OutlinedHashTree). + size_t size(bool GetTerminalCountOnly = false) const; + + /// \returns the depth of a OutlinedHashTree by traversing it. + size_t depth() const; + + /// \returns the root hash node of a OutlinedHashTree. + const HashNode *getRoot() const { return &Root; } + HashNode *getRoot() { return &Root; } + + /// Inserts a \p Sequence into the this tree. The last node in the sequence + /// will increase Terminals. + void insert(const HashSequencePair &SequencePair); + + /// Merge a \p OtherTree into this Tree. + void merge(const OutlinedHashTree *OtherTree); + + /// \returns the matching count if \p Sequence exists in the OutlinedHashTree. + std::optional find(const HashSequence &Sequence) const; + +private: + HashNode Root; +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h b/llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h new file mode 100644 index 00000000000000..de397c9ca5e70d --- /dev/null +++ b/llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h @@ -0,0 +1,75 @@ +//===- OutlinedHashTreeRecord.h --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This defines the OutlinedHashTreeRecord class. This class holds the outlined +// hash tree for both serialization and deserialization processes. It utilizes +// two data formats for serialization: raw binary data and YAML. +// These two formats can be used interchangeably. +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H +#define LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H + +#include "llvm/CodeGenData/OutlinedHashTree.h" + +namespace llvm { + +/// HashNodeStable is the serialized, stable, and compact representation +/// of a HashNode. +struct HashNodeStable { + llvm::yaml::Hex64 Hash; + unsigned Terminals; + std::vector SuccessorIds; +}; + +using IdHashNodeStableMapTy = std::map; +using IdHashNodeMapTy = DenseMap; +using HashNodeIdMapTy = DenseMap; + +struct OutlinedHashTreeRecord { + std::unique_ptr HashTree; + + OutlinedHashTreeRecord() { HashTree = std::make_unique(); } + OutlinedHashTreeRecord(std::unique_ptr HashTree) + : HashTree(std::move(HashTree)) {}; + + /// Serialize the outlined hash tree to a raw_ostream. + void serialize(raw_ostream &OS) const; + /// Deserialize the outlined hash tree from a raw_ostream. + void deserialize(const unsigned char *&Ptr); + /// Serialize the outlined hash tree to a YAML stream. + void serializeYAML(yaml::Output &YOS) const; + /// Deserialize the outlined hash tree from a YAML stream. + void deserializeYAML(yaml::Input &YIS); + + /// Merge the other outlined hash tree into this one. + void merge(const OutlinedHashTreeRecord &Other) { + HashTree->merge(Other.HashTree.get()); + } + + /// \returns true if the outlined hash tree is empty. + bool empty() const { return HashTree->empty(); } + + /// Print the outlined hash tree in a YAML format. + void print(raw_ostream &OS = llvm::errs()) const { + yaml::Output YOS(OS); + serializeYAML(YOS); + } + +private: + /// Convert the outlined hash tree to stable data. + void convertToStableData(IdHashNodeStableMapTy &IdNodeStableMap) const; + + /// Convert the stable data back to the outlined hash tree. + void convertFromStableData(const IdHashNodeStableMapTy &IdNodeStableMap); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H diff --git a/llvm/include/llvm/IR/DbgVariableFragmentInfo.h b/llvm/include/llvm/IR/DbgVariableFragmentInfo.h new file mode 100644 index 00000000000000..40326d5792f9f9 --- /dev/null +++ b/llvm/include/llvm/IR/DbgVariableFragmentInfo.h @@ -0,0 +1,45 @@ +//===- llvm/IR/DbgVariableFragmentInfo.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helper struct to describe a fragment of a debug variable. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_IR_DBGVARIABLEFRAGMENTINFO_H +#define LLVM_IR_DBGVARIABLEFRAGMENTINFO_H + +#include + +namespace llvm { +struct DbgVariableFragmentInfo { + DbgVariableFragmentInfo() = default; + DbgVariableFragmentInfo(uint64_t SizeInBits, uint64_t OffsetInBits) + : SizeInBits(SizeInBits), OffsetInBits(OffsetInBits) {} + uint64_t SizeInBits; + uint64_t OffsetInBits; + /// Return the index of the first bit of the fragment. + uint64_t startInBits() const { return OffsetInBits; } + /// Return the index of the bit after the end of the fragment, e.g. for + /// fragment offset=16 and size=32 return their sum, 48. + uint64_t endInBits() const { return OffsetInBits + SizeInBits; } + + /// Returns a zero-sized fragment if A and B don't intersect. + static DbgVariableFragmentInfo intersect(DbgVariableFragmentInfo A, + DbgVariableFragmentInfo B) { + // Don't use std::max or min to avoid including . + uint64_t StartInBits = + A.OffsetInBits > B.OffsetInBits ? A.OffsetInBits : B.OffsetInBits; + uint64_t EndInBits = + A.endInBits() < B.endInBits() ? A.endInBits() : B.endInBits(); + if (EndInBits <= StartInBits) + return {0, 0}; + return DbgVariableFragmentInfo(EndInBits - StartInBits, StartInBits); + } +}; +} // end namespace llvm + +#endif // LLVM_IR_DBGVARIABLEFRAGMENTINFO_H diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 524945862e8d42..a6220035d25c2d 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DbgVariableFragmentInfo.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/Support/Casting.h" @@ -2886,29 +2887,7 @@ class DIExpression : public MDNode { /// Return whether there is exactly one operator and it is a DW_OP_deref; bool isDeref() const; - /// Holds the characteristics of one fragment of a larger variable. - struct FragmentInfo { - FragmentInfo() = default; - FragmentInfo(uint64_t SizeInBits, uint64_t OffsetInBits) - : SizeInBits(SizeInBits), OffsetInBits(OffsetInBits) {} - uint64_t SizeInBits; - uint64_t OffsetInBits; - /// Return the index of the first bit of the fragment. - uint64_t startInBits() const { return OffsetInBits; } - /// Return the index of the bit after the end of the fragment, e.g. for - /// fragment offset=16 and size=32 return their sum, 48. - uint64_t endInBits() const { return OffsetInBits + SizeInBits; } - - /// Returns a zero-sized fragment if A and B don't intersect. - static DIExpression::FragmentInfo intersect(DIExpression::FragmentInfo A, - DIExpression::FragmentInfo B) { - uint64_t StartInBits = std::max(A.OffsetInBits, B.OffsetInBits); - uint64_t EndInBits = std::min(A.endInBits(), B.endInBits()); - if (EndInBits <= StartInBits) - return {0, 0}; - return DIExpression::FragmentInfo(EndInBits - StartInBits, StartInBits); - } - }; + using FragmentInfo = DbgVariableFragmentInfo; /// Return the number of bits that have an active value, i.e. those that /// aren't known to be zero/sign (depending on the type of Var) and which @@ -3003,6 +2982,16 @@ class DIExpression : public MDNode { /// return true with an offset of zero. bool extractIfOffset(int64_t &Offset) const; + /// Assuming that the expression operates on an address, extract a constant + /// offset and the successive ops. Return false if the expression contains + /// any incompatible ops (including non-zero DW_OP_LLVM_args - only a single + /// address operand to the expression is permitted). + /// + /// We don't try very hard to interpret the expression because we assume that + /// foldConstantMath has canonicalized the expression. + bool extractLeadingOffset(int64_t &OffsetInBytes, + SmallVectorImpl &RemainingOps) const; + /// Returns true iff this DIExpression contains at least one instance of /// `DW_OP_LLVM_arg, n` for all n in [0, N). bool hasAllLocationOps(unsigned N) const; diff --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h index ed8081a3cad197..8d7427cc67e2d9 100644 --- a/llvm/include/llvm/IR/DebugProgramInstruction.h +++ b/llvm/include/llvm/IR/DebugProgramInstruction.h @@ -50,6 +50,7 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator.h" +#include "llvm/IR/DbgVariableFragmentInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/SymbolTableListTraits.h" @@ -460,6 +461,17 @@ class DbgVariableRecord : public DbgRecord, protected DebugValueUser { resetDebugValue(0, NewLocation); } + std::optional getFragment() const; + /// Get the FragmentInfo for the variable if it exists, otherwise return a + /// FragmentInfo that covers the entire variable if the variable size is + /// known, otherwise return a zero-sized fragment. + DbgVariableFragmentInfo getFragmentOrEntireVariable() const { + if (auto Frag = getFragment()) + return *Frag; + if (auto Sz = getFragmentSizeInBits()) + return {*Sz, 0}; + return {0, 0}; + } /// Get the size (in bits) of the variable, or fragment of the variable that /// is described. std::optional getFragmentSizeInBits() const; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 6f3694cf952d47..65e3403fbf1524 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -57,6 +57,7 @@ def int_aarch64_frint64x : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0> ], [ IntrNoMem ]>; + //===----------------------------------------------------------------------===// // HINT @@ -65,6 +66,8 @@ def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>; def int_aarch64_break : Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg>]>; +def int_aarch64_hlt : Intrinsic<[], [llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg>]>; def int_aarch64_prefetch : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index 18056c5fdf816a..d0e45ab59a92eb 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -483,7 +483,7 @@ class MCGenDwarfLabelEntry { class MCCFIInstruction { public: - enum OpType { + enum OpType : uint8_t { OpSameValue, OpRememberState, OpRestoreState, @@ -500,39 +500,56 @@ class MCCFIInstruction { OpRegister, OpWindowSave, OpNegateRAState, - OpGnuArgsSize + OpGnuArgsSize, + OpLabel, }; private: - OpType Operation; MCSymbol *Label; - unsigned Register; union { - int Offset; - unsigned Register2; - }; - unsigned AddressSpace = ~0u; + struct { + unsigned Register; + int Offset; + } RI; + struct { + unsigned Register; + int Offset; + unsigned AddressSpace; + } RIA; + struct { + unsigned Register; + unsigned Register2; + } RR; + MCSymbol *CfiLabel; + } U; + OpType Operation; SMLoc Loc; std::vector Values; std::string Comment; MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, SMLoc Loc, StringRef V = "", StringRef Comment = "") - : Operation(Op), Label(L), Register(R), Offset(O), Loc(Loc), - Values(V.begin(), V.end()), Comment(Comment) { + : Label(L), Operation(Op), Loc(Loc), Values(V.begin(), V.end()), + Comment(Comment) { assert(Op != OpRegister && Op != OpLLVMDefAspaceCfa); + U.RI = {R, O}; } - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2, SMLoc Loc) - : Operation(Op), Label(L), Register(R1), Register2(R2), Loc(Loc) { + : Label(L), Operation(Op), Loc(Loc) { assert(Op == OpRegister); + U.RR = {R1, R2}; } - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, unsigned AS, SMLoc Loc) - : Operation(Op), Label(L), Register(R), Offset(O), AddressSpace(AS), - Loc(Loc) { + : Label(L), Operation(Op), Loc(Loc) { assert(Op == OpLLVMDefAspaceCfa); + U.RIA = {R, O, AS}; + } + + MCCFIInstruction(OpType Op, MCSymbol *L, MCSymbol *CfiLabel, SMLoc Loc) + : Label(L), Operation(Op), Loc(Loc) { + assert(Op == OpLabel); + U.CfiLabel = CfiLabel; } public: @@ -655,34 +672,48 @@ class MCCFIInstruction { return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, Loc); } + static MCCFIInstruction createLabel(MCSymbol *L, MCSymbol *CfiLabel, + SMLoc Loc) { + return MCCFIInstruction(OpLabel, L, CfiLabel, Loc); + } + OpType getOperation() const { return Operation; } MCSymbol *getLabel() const { return Label; } unsigned getRegister() const { + if (Operation == OpRegister) + return U.RR.Register; + if (Operation == OpLLVMDefAspaceCfa) + return U.RIA.Register; assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRestore || Operation == OpUndefined || Operation == OpSameValue || Operation == OpDefCfaRegister || - Operation == OpRelOffset || Operation == OpRegister || - Operation == OpLLVMDefAspaceCfa); - return Register; + Operation == OpRelOffset); + return U.RI.Register; } unsigned getRegister2() const { assert(Operation == OpRegister); - return Register2; + return U.RR.Register2; } unsigned getAddressSpace() const { assert(Operation == OpLLVMDefAspaceCfa); - return AddressSpace; + return U.RIA.AddressSpace; } int getOffset() const { + if (Operation == OpLLVMDefAspaceCfa) + return U.RIA.Offset; assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRelOffset || Operation == OpDefCfaOffset || - Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize || - Operation == OpLLVMDefAspaceCfa); - return Offset; + Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize); + return U.RI.Offset; + } + + MCSymbol *getCfiLabel() const { + assert(Operation == OpLabel); + return U.CfiLabel; } StringRef getValues() const { diff --git a/llvm/include/llvm/MC/MCELFExtras.h b/llvm/include/llvm/MC/MCELFExtras.h new file mode 100644 index 00000000000000..2c0718886a11bb --- /dev/null +++ b/llvm/include/llvm/MC/MCELFExtras.h @@ -0,0 +1,63 @@ +//===- MCELFExtras.h - Extra functions for ELF ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCELFEXTRAS_H +#define LLVM_MC_MCELFEXTRAS_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/bit.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace llvm::ELF { +// Encode relocations as CREL to OS. ToCrel is responsible for converting a +// const RelocsTy & to an Elf_Crel. +template +void encodeCrel(raw_ostream &OS, RelocsTy Relocs, F ToCrel) { + using uint = std::conditional_t; + uint OffsetMask = 8, Offset = 0, Addend = 0; + uint32_t SymIdx = 0, Type = 0; + for (const auto &R : Relocs) + OffsetMask |= ToCrel(R).r_offset; + const int Shift = llvm::countr_zero(OffsetMask); + encodeULEB128(Relocs.size() * 8 + ELF::CREL_HDR_ADDEND + Shift, OS); + for (const auto &R : Relocs) { + auto CR = ToCrel(R); + auto DeltaOffset = static_cast((CR.r_offset - Offset) >> Shift); + Offset = CR.r_offset; + uint8_t B = (DeltaOffset << 3) + (SymIdx != CR.r_symidx) + + (Type != CR.r_type ? 2 : 0) + + (Addend != uint(CR.r_addend) ? 4 : 0); + if (DeltaOffset < 0x10) { + OS << char(B); + } else { + OS << char(B | 0x80); + encodeULEB128(DeltaOffset >> 4, OS); + } + // Delta symidx/type/addend members (SLEB128). + if (B & 1) { + encodeSLEB128(static_cast(CR.r_symidx - SymIdx), OS); + SymIdx = CR.r_symidx; + } + if (B & 2) { + encodeSLEB128(static_cast(CR.r_type - Type), OS); + Type = CR.r_type; + } + if (B & 4) { + encodeSLEB128(std::make_signed_t(CR.r_addend - Addend), OS); + Addend = CR.r_addend; + } + } +} +} // namespace llvm::ELF + +#endif diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 1d3057a140d8a1..78aa12062102c2 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -1019,6 +1019,7 @@ class MCStreamer { SMLoc Loc = {}); virtual void emitCFIWindowSave(SMLoc Loc = {}); virtual void emitCFINegateRAState(SMLoc Loc = {}); + virtual void emitCFILabelDirective(SMLoc Loc, StringRef Name); virtual void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc()); virtual void emitWinCFIEndProc(SMLoc Loc = SMLoc()); diff --git a/llvm/include/llvm/ObjCopy/CommonConfig.h b/llvm/include/llvm/ObjCopy/CommonConfig.h index ae08d4032736e2..7f9d90d528b3ee 100644 --- a/llvm/include/llvm/ObjCopy/CommonConfig.h +++ b/llvm/include/llvm/ObjCopy/CommonConfig.h @@ -245,6 +245,9 @@ struct CommonConfig { // Symbol info specified by --add-symbol option. SmallVector SymbolsToAdd; + // Integer options + int64_t ChangeSectionLMAValAll = 0; + // Boolean options bool DeterministicArchives = true; bool ExtractDWO = false; diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 0986379ce76f5e..6bf42de89e1c4f 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -21,11 +21,13 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" +#include "llvm/Support/DataExtractor.h" #include "llvm/Support/Error.h" #include #include #include #include +#include #include namespace llvm { @@ -207,6 +209,47 @@ bool isSectionInSegment(const typename ELFT::Phdr &Phdr, checkSectionVMA(Phdr, Sec); } +// HdrHandler is called once with the number of relocations and whether the +// relocations have addends. EntryHandler is called once per decoded relocation. +template +Error decodeCrel( + ArrayRef Content, + function_ref + HdrHandler, + function_ref)> EntryHandler) { + DataExtractor Data(Content, true, 8); // endian and address size are unused + DataExtractor::Cursor Cur(0); + const uint64_t Hdr = Data.getULEB128(Cur); + size_t Count = Hdr / 8; + const size_t FlagBits = Hdr & ELF::CREL_HDR_ADDEND ? 3 : 2; + const size_t Shift = Hdr % ELF::CREL_HDR_ADDEND; + using uint = typename Elf_Crel_Impl::uint; + uint Offset = 0, Addend = 0; + HdrHandler(Count, Hdr & ELF::CREL_HDR_ADDEND); + uint32_t SymIdx = 0, Type = 0; + for (; Count; --Count) { + // The delta offset and flags member may be larger than uint64_t. Special + // case the first byte (2 or 3 flag bits; the rest are offset bits). Other + // ULEB128 bytes encode the remaining delta offset bits. + const uint8_t B = Data.getU8(Cur); + Offset += B >> FlagBits; + if (B >= 0x80) + Offset += (Data.getULEB128(Cur) << (7 - FlagBits)) - (0x80 >> FlagBits); + // Delta symidx/type/addend members (SLEB128). + if (B & 1) + SymIdx += Data.getSLEB128(Cur); + if (B & 2) + Type += Data.getSLEB128(Cur); + if (B & 4 & Hdr) + Addend += Data.getSLEB128(Cur); + if (!Cur) + break; + EntryHandler( + {Offset << Shift, SymIdx, Type, std::make_signed_t(Addend)}); + } + return Cur.takeError(); +} + template class ELFFile { public: diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 8cc09e7fd7d551..811943dcd70887 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -29,6 +29,7 @@ #include "llvm/Support/ELFAttributes.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/TargetParser/SubtargetFeature.h" @@ -122,6 +123,8 @@ class ELFObjectFileBase : public ObjectFile { Expected> readBBAddrMap(std::optional TextSectionIndex = std::nullopt, std::vector *PGOAnalyses = nullptr) const; + + StringRef getCrelDecodeProblem(SectionRef Sec) const; }; class ELFSectionRef : public SectionRef { @@ -292,6 +295,10 @@ template class ELFObjectFile : public ELFObjectFileBase { const Elf_Shdr *DotSymtabSec = nullptr; // Symbol table section. const Elf_Shdr *DotSymtabShndxSec = nullptr; // SHT_SYMTAB_SHNDX section. + // Hold CREL relocations for SectionRef::relocations(). + mutable SmallVector, 0> Crels; + mutable SmallVector CrelDecodeProblems; + Error initContent() override; void moveSymbolNext(DataRefImpl &Symb) const override; @@ -446,6 +453,7 @@ template class ELFObjectFile : public ELFObjectFileBase { const Elf_Rel *getRel(DataRefImpl Rel) const; const Elf_Rela *getRela(DataRefImpl Rela) const; + Elf_Crel getCrel(DataRefImpl Crel) const; Expected getSymbol(DataRefImpl Sym) const { return EF.template getEntry(Sym.d.a, Sym.d.b); @@ -499,6 +507,8 @@ template class ELFObjectFile : public ELFObjectFileBase { bool isRelocatableObject() const override; void createFakeSections() { EF.createFakeSections(); } + + StringRef getCrelDecodeProblem(DataRefImpl Sec) const; }; using ELF32LEObjectFile = ELFObjectFile; @@ -1022,6 +1032,24 @@ ELFObjectFile::section_rel_begin(DataRefImpl Sec) const { uintptr_t SHT = reinterpret_cast((*SectionsOrErr).begin()); RelData.d.a = (Sec.p - SHT) / EF.getHeader().e_shentsize; RelData.d.b = 0; + if (reinterpret_cast(Sec.p)->sh_type == ELF::SHT_CREL) { + if (RelData.d.a + 1 > Crels.size()) + Crels.resize(RelData.d.a + 1); + auto &Crel = Crels[RelData.d.a]; + if (Crel.empty()) { + ArrayRef Content = cantFail(getSectionContents(Sec)); + size_t I = 0; + Error Err = decodeCrel( + Content, [&](uint64_t Count, bool) { Crel.resize(Count); }, + [&](Elf_Crel Crel) { Crels[RelData.d.a][I++] = Crel; }); + if (Err) { + Crel.assign(1, Elf_Crel{0, 0, 0, 0}); + if (RelData.d.a + 1 > CrelDecodeProblems.size()) + CrelDecodeProblems.resize(RelData.d.a + 1); + CrelDecodeProblems[RelData.d.a] = toString(std::move(Err)); + } + } + } return relocation_iterator(RelocationRef(RelData, this)); } @@ -1030,9 +1058,13 @@ relocation_iterator ELFObjectFile::section_rel_end(DataRefImpl Sec) const { const Elf_Shdr *S = reinterpret_cast(Sec.p); relocation_iterator Begin = section_rel_begin(Sec); + DataRefImpl RelData = Begin->getRawDataRefImpl(); + if (S->sh_type == ELF::SHT_CREL) { + RelData.d.b = Crels[RelData.d.a].size(); + return relocation_iterator(RelocationRef(RelData, this)); + } if (S->sh_type != ELF::SHT_RELA && S->sh_type != ELF::SHT_REL) return Begin; - DataRefImpl RelData = Begin->getRawDataRefImpl(); const Elf_Shdr *RelSec = getRelSection(RelData); // Error check sh_link here so that getRelocationSymbol can just use it. @@ -1050,7 +1082,7 @@ Expected ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { const Elf_Shdr *EShdr = getSection(Sec); uintX_t Type = EShdr->sh_type; - if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA) + if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA && Type != ELF::SHT_CREL) return section_end(); Expected SecOrErr = EF.getSection(EShdr->sh_info); @@ -1070,7 +1102,9 @@ symbol_iterator ELFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { uint32_t symbolIdx; const Elf_Shdr *sec = getRelSection(Rel); - if (sec->sh_type == ELF::SHT_REL) + if (sec->sh_type == ELF::SHT_CREL) + symbolIdx = getCrel(Rel).r_symidx; + else if (sec->sh_type == ELF::SHT_REL) symbolIdx = getRel(Rel)->getSymbol(EF.isMips64EL()); else symbolIdx = getRela(Rel)->getSymbol(EF.isMips64EL()); @@ -1087,6 +1121,8 @@ ELFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { template uint64_t ELFObjectFile::getRelocationOffset(DataRefImpl Rel) const { const Elf_Shdr *sec = getRelSection(Rel); + if (sec->sh_type == ELF::SHT_CREL) + return getCrel(Rel).r_offset; if (sec->sh_type == ELF::SHT_REL) return getRel(Rel)->r_offset; @@ -1096,6 +1132,8 @@ uint64_t ELFObjectFile::getRelocationOffset(DataRefImpl Rel) const { template uint64_t ELFObjectFile::getRelocationType(DataRefImpl Rel) const { const Elf_Shdr *sec = getRelSection(Rel); + if (sec->sh_type == ELF::SHT_CREL) + return getCrel(Rel).r_type; if (sec->sh_type == ELF::SHT_REL) return getRel(Rel)->getType(EF.isMips64EL()); else @@ -1117,9 +1155,11 @@ void ELFObjectFile::getRelocationTypeName( template Expected ELFObjectFile::getRelocationAddend(DataRefImpl Rel) const { - if (getRelSection(Rel)->sh_type != ELF::SHT_RELA) - return createError("Section is not SHT_RELA"); - return (int64_t)getRela(Rel)->r_addend; + if (getRelSection(Rel)->sh_type == ELF::SHT_RELA) + return (int64_t)getRela(Rel)->r_addend; + if (getRelSection(Rel)->sh_type == ELF::SHT_CREL) + return (int64_t)getCrel(Rel).r_addend; + return createError("Relocation section does not have addends"); } template @@ -1142,6 +1182,14 @@ ELFObjectFile::getRela(DataRefImpl Rela) const { return *Ret; } +template +typename ELFObjectFile::Elf_Crel +ELFObjectFile::getCrel(DataRefImpl Crel) const { + assert(getRelSection(Crel)->sh_type == ELF::SHT_CREL); + assert(Crel.d.a < Crels.size()); + return Crels[Crel.d.a][Crel.d.b]; +} + template Expected> ELFObjectFile::create(MemoryBufferRef Object, bool InitContent) { @@ -1453,6 +1501,15 @@ template bool ELFObjectFile::isRelocatableObject() const { return EF.getHeader().e_type == ELF::ET_REL; } +template +StringRef ELFObjectFile::getCrelDecodeProblem(DataRefImpl Sec) const { + uintptr_t SHT = reinterpret_cast(cantFail(EF.sections()).begin()); + auto I = (Sec.p - SHT) / EF.getHeader().e_shentsize; + if (I < CrelDecodeProblems.size()) + return CrelDecodeProblems[I]; + return ""; +} + } // end namespace object } // end namespace llvm diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index d7c70064ca429f..f4bdc6525308d2 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -449,7 +449,7 @@ class SampleProfileReader { StringRef RemapFilename = ""); /// Return the profile summary. - ProfileSummary &getSummary() const { return *(Summary.get()); } + ProfileSummary &getSummary() const { return *Summary; } MemoryBuffer *getBuffer() const { return Buffer.get(); } diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index a509ebf6a7e1b3..2ff17bd2f7a71d 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -276,9 +276,12 @@ class LoopVectorizationLegality { bool canVectorizeFPMath(bool EnableStrictReductions); /// Return true if we can vectorize this loop while folding its tail by - /// masking, and mark all respective loads/stores for masking. - /// This object's state is only modified iff this function returns true. - bool prepareToFoldTailByMasking(); + /// masking. + bool canFoldTailByMasking() const; + + /// Mark all respective loads/stores for masking. Must only be called when + /// tail-folding is possible. + void prepareToFoldTailByMasking(); /// Returns the primary induction variable. PHINode *getPrimaryInduction() { return PrimaryInduction; } diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 877898f6daeef6..92389f2896b8e5 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -836,19 +836,6 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( } } -static ConstantRange toConstantRange(const ValueLatticeElement &Val, - Type *Ty, bool UndefAllowed = false) { - assert(Ty->isIntOrIntVectorTy() && "Must be integer type"); - if (Val.isConstantRange(UndefAllowed)) - return Val.getConstantRange(); - unsigned BW = Ty->getScalarSizeInBits(); - if (Val.isUnknown()) - return ConstantRange::getEmpty(BW); - if (Val.isConstant()) - return Val.getConstant()->toConstantRange(); - return ConstantRange::getFull(BW); -} - std::optional LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) { // Recurse on our inputs if needed @@ -865,8 +852,8 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) { ValueLatticeElement &FalseVal = *OptFalseVal; if (TrueVal.isConstantRange() || FalseVal.isConstantRange()) { - const ConstantRange &TrueCR = toConstantRange(TrueVal, SI->getType()); - const ConstantRange &FalseCR = toConstantRange(FalseVal, SI->getType()); + const ConstantRange &TrueCR = TrueVal.asConstantRange(SI->getType()); + const ConstantRange &FalseCR = FalseVal.asConstantRange(SI->getType()); Value *LHS = nullptr; Value *RHS = nullptr; SelectPatternResult SPR = matchSelectPattern(SI, LHS, RHS); @@ -941,7 +928,7 @@ LazyValueInfoImpl::getRangeFor(Value *V, Instruction *CxtI, BasicBlock *BB) { std::optional OptVal = getBlockValue(V, BB, CxtI); if (!OptVal) return std::nullopt; - return toConstantRange(*OptVal, V->getType()); + return OptVal->asConstantRange(V->getType()); } std::optional @@ -1119,7 +1106,7 @@ LazyValueInfoImpl::getValueFromSimpleICmpCondition(CmpInst::Predicate Pred, getBlockValue(RHS, CxtI->getParent(), CxtI); if (!R) return std::nullopt; - RHSRange = toConstantRange(*R, RHS->getType()); + RHSRange = R->asConstantRange(RHS->getType()); } ConstantRange TrueValues = @@ -1734,7 +1721,7 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, Instruction *CxtI, BasicBlock *BB = CxtI->getParent(); ValueLatticeElement Result = getOrCreateImpl(BB->getModule()).getValueInBlock(V, BB, CxtI); - return toConstantRange(Result, V->getType(), UndefAllowed); + return Result.asConstantRange(V->getType(), UndefAllowed); } ConstantRange LazyValueInfo::getConstantRangeAtUse(const Use &U, @@ -1742,7 +1729,7 @@ ConstantRange LazyValueInfo::getConstantRangeAtUse(const Use &U, auto *Inst = cast(U.getUser()); ValueLatticeElement Result = getOrCreateImpl(Inst->getModule()).getValueAtUse(U); - return toConstantRange(Result, U->getType(), UndefAllowed); + return Result.asConstantRange(U->getType(), UndefAllowed); } /// Determine whether the specified value is known to be a @@ -1772,7 +1759,7 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, ValueLatticeElement Result = getOrCreateImpl(M).getValueOnEdge(V, FromBB, ToBB, CxtI); // TODO: Should undef be allowed here? - return toConstantRange(Result, V->getType(), /*UndefAllowed*/ true); + return Result.asConstantRange(V->getType(), /*UndefAllowed*/ true); } static Constant *getPredicateResult(CmpInst::Predicate Pred, Constant *C, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index f132e455405253..018861a665c4cd 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -3084,6 +3084,22 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) { return *It->second; } +void LoopAccessInfoManager::clear() { + SmallVector ToRemove; + // Collect LoopAccessInfo entries that may keep references to IR outside the + // analyzed loop or SCEVs that may have been modified or invalidated. At the + // moment, that is loops requiring memory or SCEV runtime checks, as those cache + // SCEVs, e.g. for pointer expressions. + for (const auto &[L, LAI] : LoopAccessInfoMap) { + if (LAI->getRuntimePointerChecking()->getChecks().empty() && + LAI->getPSE().getPredicate().isAlwaysTrue()) + continue; + ToRemove.push_back(L); + } + + for (Loop *L : ToRemove) + LoopAccessInfoMap.erase(L); +} bool LoopAccessInfoManager::invalidate( Function &F, const PreservedAnalyses &PA, diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 05c96971233710..f56b2b32ff98f5 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1988,8 +1988,8 @@ Error BitcodeReader::parseAttributeBlock() { Attrs.clear(); break; case bitc::PARAMATTR_CODE_ENTRY: // ENTRY: [attrgrp0, attrgrp1, ...] - for (unsigned i = 0, e = Record.size(); i != e; ++i) - Attrs.push_back(MAttributeGroups[Record[i]]); + for (uint64_t Val : Record) + Attrs.push_back(MAttributeGroups[Val]); MAttributes.push_back(AttributeList::get(Context, Attrs)); Attrs.clear(); diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 9102f3a60cffc2..7d7b224a17d3b8 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -549,8 +549,8 @@ class MetadataLoader::MetadataLoaderImpl { /// DISubprogram's retainedNodes. void upgradeCULocals() { if (NamedMDNode *CUNodes = TheModule.getNamedMetadata("llvm.dbg.cu")) { - for (unsigned I = 0, E = CUNodes->getNumOperands(); I != E; ++I) { - auto *CU = dyn_cast(CUNodes->getOperand(I)); + for (MDNode *N : CUNodes->operands()) { + auto *CU = dyn_cast(N); if (!CU) continue; diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 631f31cba97675..9f735f77d29dc8 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -620,8 +620,8 @@ void ValueEnumerator::EnumerateNamedMetadata(const Module &M) { } void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { - for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) - EnumerateMetadata(nullptr, MD->getOperand(i)); + for (const MDNode *N : MD->operands()) + EnumerateMetadata(nullptr, N); } unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const { @@ -931,10 +931,9 @@ void ValueEnumerator::EnumerateValue(const Value *V) { // itself. This makes it more likely that we can avoid forward references // in the reader. We know that there can be no cycles in the constants // graph that don't go through a global variable. - for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); - I != E; ++I) - if (!isa(*I)) // Don't enumerate BB operand to BlockAddress. - EnumerateValue(*I); + for (const Use &U : C->operands()) + if (!isa(U)) // Don't enumerate BB operand to BlockAddress. + EnumerateValue(U); if (auto *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::ShuffleVector) EnumerateValue(CE->getShuffleMaskForBitcode()); @@ -1144,12 +1143,12 @@ void ValueEnumerator::incorporateFunction(const Function &F) { } // Add all of the function-local metadata. - for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) { + for (const LocalAsMetadata *Local : FnLocalMDVector) { // At this point, every local values have been incorporated, we shouldn't // have a metadata operand that references a value that hasn't been seen. - assert(ValueMap.count(FnLocalMDVector[i]->getValue()) && + assert(ValueMap.count(Local->getValue()) && "Missing value for metadata operand"); - EnumerateFunctionLocalMetadata(F, FnLocalMDVector[i]); + EnumerateFunctionLocalMetadata(F, Local); } // DIArgList entries must come after function-local metadata, as it is not // possible to forward-reference them. @@ -1159,8 +1158,8 @@ void ValueEnumerator::incorporateFunction(const Function &F) { void ValueEnumerator::purgeFunction() { /// Remove purged values from the ValueMap. - for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) - ValueMap.erase(Values[i].first); + for (const auto &V : llvm::drop_begin(Values, NumModuleValues)) + ValueMap.erase(V.first); for (const Metadata *MD : llvm::drop_begin(MDs, NumModuleMDs)) MetadataMap.erase(MD); for (const BasicBlock *BB : BasicBlocks) diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index 08f198679551d1..638c3bd6f90f53 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(InterfaceStub) add_subdirectory(IRPrinter) add_subdirectory(IRReader) add_subdirectory(CodeGen) +add_subdirectory(CodeGenData) add_subdirectory(CodeGenTypes) add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 1391893e55a52a..026471286ca080 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2328,8 +2328,10 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit linkage for the function entry point. emitLinkage(&F, FnEntryPointSym); - // Emit linkage for the function descriptor. - emitLinkage(&F, Name); + // If a function's address is taken, which means it may be called via a + // function pointer, we need the function descriptor for it. + if (F.hasAddressTaken()) + emitLinkage(&F, Name); } // Emit the remarks section contents. @@ -2993,8 +2995,7 @@ void AsmPrinter::emitModuleIdents(Module &M) { return; if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); + for (const MDNode *N : NMD->operands()) { assert(N->getNumOperands() == 1 && "llvm.ident metadata entry can have only one operand"); const MDString *S = cast(N->getOperand(0)); @@ -3015,8 +3016,7 @@ void AsmPrinter::emitModuleCommandLines(Module &M) { OutStreamer->pushSection(); OutStreamer->switchSection(CommandLine); OutStreamer->emitZeros(1); - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); + for (const MDNode *N : NMD->operands()) { assert(N->getNumOperands() == 1 && "llvm.commandline metadata entry can have only one operand"); const MDString *S = cast(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index 87b062a16df1d2..1ff01ad34b30e6 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -248,6 +248,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpGnuArgsSize: + case MCCFIInstruction::OpLabel: break; } if (CSRReg || CSROffset) { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 5a7d8a5c1f9eb6..22d0708f547860 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -8331,7 +8331,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) return true; - if ((isa(I) || isa(I) || isa(I)) && + if ((isa(I) || isa(I) || isa(I) || + isa(I)) && TLI->optimizeExtendOrTruncateConversion( I, LI->getLoopFor(I->getParent()), *TTI)) return true; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 91fc9d764b3b87..9558247db3c40d 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -859,5 +859,5 @@ GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; Info = std::make_unique(MF, MaxDepth); } - return *Info.get(); + return *Info; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 7b96f4589f5c43..d348c2b86916f2 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3968,7 +3968,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { #endif // ifndef NDEBUG // Translate any debug-info attached to the instruction. - translateDbgInfo(Inst, *CurBuilder.get()); + translateDbgInfo(Inst, *CurBuilder); if (translate(Inst)) continue; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 86de1f3be9047f..3f1094e0ac703d 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2461,13 +2461,22 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF; } + unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); + + if (MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { + // An optimization where the result is the CTLZ after the left shift by + // (Difference in widety and current ty), that is, + // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy)) + // Result = ctlz MIBSrc + MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc, + MIRBuilder.buildConstant(WideTy, SizeDiff)); + } + // Perform the operation at the larger size. auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc}); // This is already the correct result for CTPOP and CTTZs - if (MI.getOpcode() == TargetOpcode::G_CTLZ || - MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { + if (MI.getOpcode() == TargetOpcode::G_CTLZ) { // The correct result is NewOp - (Difference in widety and current ty). - unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); MIBNewOp = MIRBuilder.buildSub( WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d036a0285e5719..d6a0dd9ae9b208 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5083,7 +5083,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: { // Zero extend the argument unless its cttz, then use any_extend. if (Node->getOpcode() == ISD::CTTZ || @@ -5106,7 +5105,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is // already the correct result. Tmp1 = DAG.getNode(NewOpc, dl, NVT, Tmp1); - if (NewOpc == ISD::CTLZ || NewOpc == ISD::CTLZ_ZERO_UNDEF) { + if (NewOpc == ISD::CTLZ) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - @@ -5115,6 +5114,25 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; } + case ISD::CTLZ_ZERO_UNDEF: { + // We know that the argument is unlikely to be zero, hence we can take a + // different approach as compared to ISD::CTLZ + + // Any Extend the argument + auto AnyExtendedNode = + DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0)); + + // Tmp1 = Tmp1 << (sizeinbits(NVT) - sizeinbits(Old VT)) + auto ShiftConstant = DAG.getShiftAmountConstant( + NVT.getSizeInBits() - OVT.getSizeInBits(), NVT, dl); + auto LeftShiftResult = + DAG.getNode(ISD::SHL, dl, NVT, AnyExtendedNode, ShiftConstant); + + // Perform the larger operation + auto CTLZResult = DAG.getNode(Node->getOpcode(), dl, NVT, LeftShiftResult); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, CTLZResult)); + break; + } case ISD::BITREVERSE: case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 38f8f072dccfd6..fed5ebcc3c903e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -655,24 +655,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { } } - // Subtract off the extra leading bits in the bigger type. - SDValue ExtractLeadingBits = DAG.getConstant( - NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); - if (!N->isVPOpcode()) { + unsigned CtlzOpcode = N->getOpcode(); + if (CtlzOpcode == ISD::CTLZ || CtlzOpcode == ISD::VP_CTLZ) { + // Subtract off the extra leading bits in the bigger type. + SDValue ExtractLeadingBits = DAG.getConstant( + NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); + + if (!N->isVPOpcode()) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op), + ExtractLeadingBits); + } + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); // Zero extend to the promoted type and do the count there. - SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SUB, dl, NVT, - DAG.getNode(N->getOpcode(), dl, NVT, Op), - ExtractLeadingBits); - } + SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + return DAG.getNode(ISD::VP_SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), + ExtractLeadingBits, Mask, EVL); + } + if (CtlzOpcode == ISD::CTLZ_ZERO_UNDEF || + CtlzOpcode == ISD::VP_CTLZ_ZERO_UNDEF) { + // Any Extend the argument + SDValue Op = GetPromotedInteger(N->getOperand(0)); + // Op = Op << (sizeinbits(NVT) - sizeinbits(Old VT)) + unsigned SHLAmount = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + auto ShiftConst = + DAG.getShiftAmountConstant(SHLAmount, Op.getValueType(), dl); + if (!N->isVPOpcode()) { + Op = DAG.getNode(ISD::SHL, dl, NVT, Op, ShiftConst); + return DAG.getNode(CtlzOpcode, dl, NVT, Op); + } - SDValue Mask = N->getOperand(1); - SDValue EVL = N->getOperand(2); - // Zero extend to the promoted type and do the count there. - SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); - return DAG.getNode(ISD::VP_SUB, dl, NVT, - DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), - ExtractLeadingBits, Mask, EVL); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + Op = DAG.getNode(ISD::VP_SHL, dl, NVT, Op, ShiftConst, Mask, EVL); + return DAG.getNode(CtlzOpcode, dl, NVT, Op, Mask, EVL); + } + llvm_unreachable("Invalid CTLZ Opcode"); } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { diff --git a/llvm/lib/CodeGenData/CMakeLists.txt b/llvm/lib/CodeGenData/CMakeLists.txt new file mode 100644 index 00000000000000..3ba90f96cc86d4 --- /dev/null +++ b/llvm/lib/CodeGenData/CMakeLists.txt @@ -0,0 +1,14 @@ +add_llvm_component_library(LLVMCodeGenData + OutlinedHashTree.cpp + OutlinedHashTreeRecord.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGenData + + DEPENDS + intrinsics_gen + + LINK_COMPONENTS + Core + Support + ) diff --git a/llvm/lib/CodeGenData/OutlinedHashTree.cpp b/llvm/lib/CodeGenData/OutlinedHashTree.cpp new file mode 100644 index 00000000000000..d64098098de62b --- /dev/null +++ b/llvm/lib/CodeGenData/OutlinedHashTree.cpp @@ -0,0 +1,129 @@ +//===-- OutlinedHashTree.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// An OutlinedHashTree is a Trie that contains sequences of stable hash values +// of instructions that have been outlined. This OutlinedHashTree can be used +// to understand the outlined instruction sequences collected across modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGenData/OutlinedHashTree.h" + +#define DEBUG_TYPE "outlined-hash-tree" + +using namespace llvm; + +void OutlinedHashTree::walkGraph(NodeCallbackFn CallbackNode, + EdgeCallbackFn CallbackEdge, + bool SortedWalk) const { + SmallVector Stack; + Stack.emplace_back(getRoot()); + + while (!Stack.empty()) { + const auto *Current = Stack.pop_back_val(); + if (CallbackNode) + CallbackNode(Current); + + auto HandleNext = [&](const HashNode *Next) { + if (CallbackEdge) + CallbackEdge(Current, Next); + Stack.emplace_back(Next); + }; + if (SortedWalk) { + SmallVector> SortedSuccessors; + for (const auto &[Hash, Successor] : Current->Successors) + SortedSuccessors.emplace_back(Hash, Successor.get()); + llvm::sort(SortedSuccessors); + for (const auto &P : SortedSuccessors) + HandleNext(P.second); + } else { + for (const auto &P : Current->Successors) + HandleNext(P.second.get()); + } + } +} + +size_t OutlinedHashTree::size(bool GetTerminalCountOnly) const { + size_t Size = 0; + walkGraph([&Size, GetTerminalCountOnly](const HashNode *N) { + Size += (N && (!GetTerminalCountOnly || N->Terminals)); + }); + return Size; +} + +size_t OutlinedHashTree::depth() const { + size_t Size = 0; + DenseMap DepthMap; + walkGraph([&Size, &DepthMap]( + const HashNode *N) { Size = std::max(Size, DepthMap[N]); }, + [&DepthMap](const HashNode *Src, const HashNode *Dst) { + size_t Depth = DepthMap[Src]; + DepthMap[Dst] = Depth + 1; + }); + return Size; +} + +void OutlinedHashTree::insert(const HashSequencePair &SequencePair) { + auto &[Sequence, Count] = SequencePair; + HashNode *Current = getRoot(); + + for (stable_hash StableHash : Sequence) { + auto I = Current->Successors.find(StableHash); + if (I == Current->Successors.end()) { + std::unique_ptr Next = std::make_unique(); + HashNode *NextPtr = Next.get(); + NextPtr->Hash = StableHash; + Current->Successors.emplace(StableHash, std::move(Next)); + Current = NextPtr; + } else + Current = I->second.get(); + } + if (Count) + Current->Terminals = (Current->Terminals ? *Current->Terminals : 0) + Count; +} + +void OutlinedHashTree::merge(const OutlinedHashTree *Tree) { + HashNode *Dst = getRoot(); + const HashNode *Src = Tree->getRoot(); + SmallVector> Stack; + Stack.emplace_back(Dst, Src); + + while (!Stack.empty()) { + auto [DstNode, SrcNode] = Stack.pop_back_val(); + if (!SrcNode) + continue; + if (SrcNode->Terminals) + DstNode->Terminals = + (DstNode->Terminals ? *DstNode->Terminals : 0) + *SrcNode->Terminals; + for (auto &[Hash, NextSrcNode] : SrcNode->Successors) { + HashNode *NextDstNode; + auto I = DstNode->Successors.find(Hash); + if (I == DstNode->Successors.end()) { + auto NextDst = std::make_unique(); + NextDstNode = NextDst.get(); + NextDstNode->Hash = Hash; + DstNode->Successors.emplace(Hash, std::move(NextDst)); + } else + NextDstNode = I->second.get(); + + Stack.emplace_back(NextDstNode, NextSrcNode.get()); + } + } +} + +std::optional +OutlinedHashTree::find(const HashSequence &Sequence) const { + const HashNode *Current = getRoot(); + for (stable_hash StableHash : Sequence) { + const auto I = Current->Successors.find(StableHash); + if (I == Current->Successors.end()) + return 0; + Current = I->second.get(); + } + return Current->Terminals; +} diff --git a/llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp b/llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp new file mode 100644 index 00000000000000..996a57fd5e713c --- /dev/null +++ b/llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp @@ -0,0 +1,168 @@ +//===-- OutlinedHashTreeRecord.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines the OutlinedHashTreeRecord class. This class holds the outlined +// hash tree for both serialization and deserialization processes. It utilizes +// two data formats for serialization: raw binary data and YAML. +// These two formats can be used interchangeably. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" + +#define DEBUG_TYPE "outlined-hash-tree" + +using namespace llvm; +using namespace llvm::support; + +namespace llvm { +namespace yaml { + +template <> struct MappingTraits { + static void mapping(IO &io, HashNodeStable &res) { + io.mapRequired("Hash", res.Hash); + io.mapRequired("Terminals", res.Terminals); + io.mapRequired("SuccessorIds", res.SuccessorIds); + } +}; + +template <> struct CustomMappingTraits { + static void inputOne(IO &io, StringRef Key, IdHashNodeStableMapTy &V) { + HashNodeStable NodeStable; + io.mapRequired(Key.str().c_str(), NodeStable); + unsigned Id; + if (Key.getAsInteger(0, Id)) { + io.setError("Id not an integer"); + return; + } + V.insert({Id, NodeStable}); + } + + static void output(IO &io, IdHashNodeStableMapTy &V) { + for (auto Iter = V.begin(); Iter != V.end(); ++Iter) + io.mapRequired(utostr(Iter->first).c_str(), Iter->second); + } +}; + +} // namespace yaml +} // namespace llvm + +void OutlinedHashTreeRecord::serialize(raw_ostream &OS) const { + IdHashNodeStableMapTy IdNodeStableMap; + convertToStableData(IdNodeStableMap); + support::endian::Writer Writer(OS, endianness::little); + Writer.write(IdNodeStableMap.size()); + + for (const auto &[Id, NodeStable] : IdNodeStableMap) { + Writer.write(Id); + Writer.write(NodeStable.Hash); + Writer.write(NodeStable.Terminals); + Writer.write(NodeStable.SuccessorIds.size()); + for (auto SuccessorId : NodeStable.SuccessorIds) + Writer.write(SuccessorId); + } +} + +void OutlinedHashTreeRecord::deserialize(const unsigned char *&Ptr) { + IdHashNodeStableMapTy IdNodeStableMap; + auto NumIdNodeStableMap = + endian::readNext(Ptr); + + for (unsigned I = 0; I < NumIdNodeStableMap; ++I) { + auto Id = endian::readNext(Ptr); + HashNodeStable NodeStable; + NodeStable.Hash = + endian::readNext(Ptr); + NodeStable.Terminals = + endian::readNext(Ptr); + auto NumSuccessorIds = + endian::readNext(Ptr); + for (unsigned J = 0; J < NumSuccessorIds; ++J) + NodeStable.SuccessorIds.push_back( + endian::readNext(Ptr)); + + IdNodeStableMap[Id] = std::move(NodeStable); + } + + convertFromStableData(IdNodeStableMap); +} + +void OutlinedHashTreeRecord::serializeYAML(yaml::Output &YOS) const { + IdHashNodeStableMapTy IdNodeStableMap; + convertToStableData(IdNodeStableMap); + + YOS << IdNodeStableMap; +} + +void OutlinedHashTreeRecord::deserializeYAML(yaml::Input &YIS) { + IdHashNodeStableMapTy IdNodeStableMap; + + YIS >> IdNodeStableMap; + YIS.nextDocument(); + + convertFromStableData(IdNodeStableMap); +} + +void OutlinedHashTreeRecord::convertToStableData( + IdHashNodeStableMapTy &IdNodeStableMap) const { + // Build NodeIdMap + HashNodeIdMapTy NodeIdMap; + HashTree->walkGraph( + [&NodeIdMap](const HashNode *Current) { + size_t Index = NodeIdMap.size(); + NodeIdMap[Current] = Index; + assert(Index = NodeIdMap.size() + 1 && + "Expected size of NodeMap to increment by 1"); + }, + /*EdgeCallbackFn=*/nullptr, /*SortedWork=*/true); + + // Convert NodeIdMap to NodeStableMap + for (auto &P : NodeIdMap) { + auto *Node = P.first; + auto Id = P.second; + HashNodeStable NodeStable; + NodeStable.Hash = Node->Hash; + NodeStable.Terminals = Node->Terminals ? *Node->Terminals : 0; + for (auto &P : Node->Successors) + NodeStable.SuccessorIds.push_back(NodeIdMap[P.second.get()]); + IdNodeStableMap[Id] = NodeStable; + } + + // Sort the Successors so that they come out in the same order as in the map. + for (auto &P : IdNodeStableMap) + llvm::sort(P.second.SuccessorIds); +} + +void OutlinedHashTreeRecord::convertFromStableData( + const IdHashNodeStableMapTy &IdNodeStableMap) { + IdHashNodeMapTy IdNodeMap; + // Initialize the root node at 0. + IdNodeMap[0] = HashTree->getRoot(); + assert(IdNodeMap[0]->Successors.empty()); + + for (auto &P : IdNodeStableMap) { + auto Id = P.first; + const HashNodeStable &NodeStable = P.second; + assert(IdNodeMap.count(Id)); + HashNode *Curr = IdNodeMap[Id]; + Curr->Hash = NodeStable.Hash; + if (NodeStable.Terminals) + Curr->Terminals = NodeStable.Terminals; + auto &Successors = Curr->Successors; + assert(Successors.empty()); + for (auto SuccessorId : NodeStable.SuccessorIds) { + auto Sucessor = std::make_unique(); + IdNodeMap[SuccessorId] = Sucessor.get(); + auto Hash = IdNodeStableMap.at(SuccessorId).Hash; + Successors[Hash] = std::move(Sucessor); + } + } +} diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp index 0cb9cd5f9ea31d..6f659eb8576b79 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp @@ -1843,13 +1843,13 @@ bool CompileUnit::resolveDependenciesAndMarkLiveness( bool CompileUnit::updateDependenciesCompleteness() { assert(Dependencies.get()); - return Dependencies.get()->updateDependenciesCompleteness(); + return Dependencies->updateDependenciesCompleteness(); } void CompileUnit::verifyDependencies() { assert(Dependencies.get()); - Dependencies.get()->verifyKeepChain(); + Dependencies->verifyKeepChain(); } ArrayRef dwarf_linker::parallel::getODRAttributes() { diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp index 84fd0806f07050..6d9e3319db7e5e 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp @@ -142,7 +142,7 @@ Error DWARFLinkerImpl::link() { // twice. And then following handling might be removed. for (const std::unique_ptr &OrigCU : Context->InputDWARFFile.Dwarf->compile_units()) { - DWARFDie UnitDie = OrigCU.get()->getUnitDIE(); + DWARFDie UnitDie = OrigCU->getUnitDIE(); if (!Language) { if (std::optional Val = diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 0bf8be9ac55f9d..6599730590de60 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1021,8 +1021,8 @@ void SlotTracker::processModule() { // Add metadata used by named metadata. for (const NamedMDNode &NMD : TheModule->named_metadata()) { - for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - CreateMetadataSlot(NMD.getOperand(i)); + for (const MDNode *N : NMD.operands()) + CreateMetadataSlot(N); } for (const Function &F : *TheModule) { diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 161a30dfb38288..b4f7a9df412813 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1760,6 +1760,45 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const { return false; } +bool DIExpression::extractLeadingOffset( + int64_t &OffsetInBytes, SmallVectorImpl &RemainingOps) const { + OffsetInBytes = 0; + RemainingOps.clear(); + + auto SingleLocEltsOpt = getSingleLocationExpressionElements(); + if (!SingleLocEltsOpt) + return false; + + auto ExprOpEnd = expr_op_iterator(SingleLocEltsOpt->end()); + auto ExprOpIt = expr_op_iterator(SingleLocEltsOpt->begin()); + while (ExprOpIt != ExprOpEnd) { + uint64_t Op = ExprOpIt->getOp(); + if (Op == dwarf::DW_OP_deref || Op == dwarf::DW_OP_deref_size || + Op == dwarf::DW_OP_deref_type || Op == dwarf::DW_OP_LLVM_fragment || + Op == dwarf::DW_OP_LLVM_extract_bits_zext || + Op == dwarf::DW_OP_LLVM_extract_bits_sext) { + break; + } else if (Op == dwarf::DW_OP_plus_uconst) { + OffsetInBytes += ExprOpIt->getArg(0); + } else if (Op == dwarf::DW_OP_constu) { + uint64_t Value = ExprOpIt->getArg(0); + ++ExprOpIt; + if (ExprOpIt->getOp() == dwarf::DW_OP_plus) + OffsetInBytes += Value; + else if (ExprOpIt->getOp() == dwarf::DW_OP_minus) + OffsetInBytes -= Value; + else + return false; + } else { + // Not a const plus/minus operation or deref. + return false; + } + ++ExprOpIt; + } + RemainingOps.append(ExprOpIt.getBase(), ExprOpEnd.getBase()); + return true; +} + bool DIExpression::hasAllLocationOps(unsigned N) const { SmallDenseSet SeenOps; for (auto ExprOp : expr_ops()) diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp index 9a4926c81dca27..362d467beeb11b 100644 --- a/llvm/lib/IR/DebugProgramInstruction.cpp +++ b/llvm/lib/IR/DebugProgramInstruction.cpp @@ -371,6 +371,10 @@ bool DbgVariableRecord::isKillLocation() const { any_of(location_ops(), [](Value *V) { return isa(V); }); } +std::optional DbgVariableRecord::getFragment() const { + return getExpression()->getFragmentInfo(); +} + std::optional DbgVariableRecord::getFragmentSizeInBits() const { if (auto Fragment = getExpression()->getFragmentInfo()) return Fragment->SizeInBits; @@ -399,7 +403,7 @@ DbgVariableIntrinsic * DbgVariableRecord::createDebugIntrinsic(Module *M, Instruction *InsertBefore) const { [[maybe_unused]] DICompileUnit *Unit = - getDebugLoc().get()->getScope()->getSubprogram()->getUnit(); + getDebugLoc()->getScope()->getSubprogram()->getUnit(); assert(M && Unit && "Cannot clone from BasicBlock that is not part of a Module or " "DICompileUnit!"); diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 16c79442f82fef..c966c53d09baf9 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -387,8 +387,7 @@ void Module::setModuleFlag(ModFlagBehavior Behavior, StringRef Key, Metadata *Val) { NamedMDNode *ModFlags = getOrInsertModuleFlagsMetadata(); // Replace the flag if it already exists. - for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { - MDNode *Flag = ModFlags->getOperand(I); + for (MDNode *Flag : ModFlags->operands()) { ModFlagBehavior MFB; MDString *K = nullptr; Metadata *V = nullptr; diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 5cba6eb15b5c99..59d796b419b35a 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" @@ -903,44 +904,14 @@ void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, WriteWord(EntrySize); // sh_entsize } -template +template static void encodeCrel(ArrayRef Relocs, raw_ostream &OS) { - uint OffsetMask = 8, Offset = 0, Addend = 0; - uint32_t SymIdx = 0, Type = 0; - // hdr & 4 indicates 3 flag bits in delta offset and flags members. - for (const ELFRelocationEntry &Entry : Relocs) - OffsetMask |= Entry.Offset; - const int Shift = llvm::countr_zero(OffsetMask); - encodeULEB128(Relocs.size() * 8 + ELF::CREL_HDR_ADDEND + Shift, OS); - for (const ELFRelocationEntry &Entry : Relocs) { - // The delta offset and flags member may be larger than uint64_t. Special - // case the first byte (3 flag bits and 4 offset bits). Other ULEB128 bytes - // encode the remaining delta offset bits. - auto DeltaOffset = static_cast((Entry.Offset - Offset) >> Shift); - Offset = Entry.Offset; - uint32_t CurSymIdx = Entry.Symbol ? Entry.Symbol->getIndex() : 0; - uint8_t B = (DeltaOffset << 3) + (SymIdx != CurSymIdx) + - (Type != Entry.Type ? 2 : 0) + (Addend != Entry.Addend ? 4 : 0); - if (DeltaOffset < 0x10) { - OS << char(B); - } else { - OS << char(B | 0x80); - encodeULEB128(DeltaOffset >> 4, OS); - } - // Delta symidx/type/addend members (SLEB128). - if (B & 1) { - encodeSLEB128(static_cast(CurSymIdx - SymIdx), OS); - SymIdx = CurSymIdx; - } - if (B & 2) { - encodeSLEB128(static_cast(Entry.Type - Type), OS); - Type = Entry.Type; - } - if (B & 4) { - encodeSLEB128(std::make_signed_t(Entry.Addend - Addend), OS); - Addend = Entry.Addend; - } - } + using uint = std::conditional_t; + ELF::encodeCrel(OS, Relocs, [&](const ELFRelocationEntry &R) { + uint32_t SymIdx = R.Symbol ? R.Symbol->getIndex() : 0; + return ELF::Elf_Crel{static_cast(R.Offset), SymIdx, R.Type, + std::make_signed_t(R.Addend)}; + }); } void ELFWriter::writeRelocations(const MCAssembler &Asm, @@ -989,9 +960,9 @@ void ELFWriter::writeRelocations(const MCAssembler &Asm, } } else if (TO && TO->Crel) { if (is64Bit()) - encodeCrel(Relocs, W.OS); + encodeCrel(Relocs, W.OS); else - encodeCrel(Relocs, W.OS); + encodeCrel(Relocs, W.OS); } else { for (const ELFRelocationEntry &Entry : Relocs) { uint32_t Symidx = Entry.Symbol ? Entry.Symbol->getIndex() : 0; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 0050923f3cd9da..45c32f13e759b0 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -356,6 +356,7 @@ class MCAsmStreamer final : public MCStreamer { void emitCFIWindowSave(SMLoc Loc) override; void emitCFINegateRAState(SMLoc Loc) override; void emitCFIReturnColumn(int64_t Register) override; + void emitCFILabelDirective(SMLoc Loc, StringRef Name) override; void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override; void emitWinCFIEndProc(SMLoc Loc) override; @@ -2126,6 +2127,12 @@ void MCAsmStreamer::emitCFIReturnColumn(int64_t Register) { EmitEOL(); } +void MCAsmStreamer::emitCFILabelDirective(SMLoc Loc, StringRef Name) { + MCStreamer::emitCFILabelDirective(Loc, Name); + OS << "\t.cfi_label " << Name; + EmitEOL(); +} + void MCAsmStreamer::emitCFIBKeyFrame() { MCStreamer::emitCFIBKeyFrame(); OS << "\t.cfi_b_key_frame"; diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 321a66ee5abc42..efafd555c5c5c8 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -1465,6 +1465,9 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { case MCCFIInstruction::OpEscape: Streamer.emitBytes(Instr.getValues()); return; + case MCCFIInstruction::OpLabel: + Streamer.emitLabel(Instr.getCfiLabel(), Instr.getLoc()); + return; } llvm_unreachable("Unhandled case in switch"); } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 707edb0481a619..f3caa90eedfb16 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -520,6 +520,7 @@ class AsmParser : public MCAsmParser { DK_CFI_UNDEFINED, DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE, + DK_CFI_LABEL, DK_CFI_B_KEY_FRAME, DK_MACROS_ON, DK_MACROS_OFF, @@ -622,6 +623,7 @@ class AsmParser : public MCAsmParser { bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc); bool parseDirectiveCFISignalFrame(SMLoc DirectiveLoc); bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc); + bool parseDirectiveCFILabel(SMLoc DirectiveLoc); // macro directives bool parseDirectivePurgeMacro(SMLoc DirectiveLoc); @@ -2224,6 +2226,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveCFIRegister(IDLoc); case DK_CFI_WINDOW_SAVE: return parseDirectiveCFIWindowSave(IDLoc); + case DK_CFI_LABEL: + return parseDirectiveCFILabel(IDLoc); case DK_MACROS_ON: case DK_MACROS_OFF: return parseDirectiveMacrosOnOff(IDVal); @@ -4488,6 +4492,19 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) { return false; } +/// parseDirectiveCFILabel +/// ::= .cfi_label label +bool AsmParser::parseDirectiveCFILabel(SMLoc Loc) { + StringRef Name; + Loc = Lexer.getLoc(); + if (parseIdentifier(Name)) + return TokError("expected identifier"); + if (parseEOL()) + return true; + getStreamer().emitCFILabelDirective(Loc, Name); + return false; +} + /// parseDirectiveAltmacro /// ::= .altmacro /// ::= .noaltmacro @@ -5560,6 +5577,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED; DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER; DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE; + DirectiveKindMap[".cfi_label"] = DK_CFI_LABEL; DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME; DirectiveKindMap[".cfi_mte_tagged_frame"] = DK_CFI_MTE_TAGGED_FRAME; DirectiveKindMap[".macros_on"] = DK_MACROS_ON; diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index a3f67941c10157..1594bd3231abe8 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -689,6 +689,13 @@ void MCStreamer::emitCFIReturnColumn(int64_t Register) { CurFrame->RAReg = Register; } +void MCStreamer::emitCFILabelDirective(SMLoc Loc, StringRef Name) { + MCSymbol *Label = emitCFILabel(); + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + if (MCDwarfFrameInfo *F = getCurrentDwarfFrameInfo()) + F->Instructions.push_back(MCCFIInstruction::createLabel(Label, Sym, Loc)); +} + WinEH::FrameInfo *MCStreamer::EnsureValidWinFrameInfo(SMLoc Loc) { const MCAsmInfo *MAI = Context.getAsmInfo(); if (!MAI->usesWindowsCFI()) { diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp index 6442f1b958fb4d..c542c4e5f07430 100644 --- a/llvm/lib/ObjCopy/ConfigManager.cpp +++ b/llvm/lib/ObjCopy/ConfigManager.cpp @@ -25,7 +25,8 @@ Expected ConfigManager::getCOFFConfig() const { Common.StripNonAlloc || Common.StripSections || Common.Weaken || Common.DecompressDebugSections || Common.DiscardMode == DiscardType::Locals || - !Common.SymbolsToAdd.empty() || Common.GapFill != 0 || Common.PadTo != 0) + !Common.SymbolsToAdd.empty() || Common.GapFill != 0 || + Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0) return createStringError(llvm::errc::invalid_argument, "option is not supported for COFF"); @@ -46,7 +47,8 @@ Expected ConfigManager::getMachOConfig() const { Common.StripNonAlloc || Common.StripSections || Common.DecompressDebugSections || Common.StripUnneeded || Common.DiscardMode == DiscardType::Locals || - !Common.SymbolsToAdd.empty() || Common.GapFill != 0 || Common.PadTo != 0) + !Common.SymbolsToAdd.empty() || Common.GapFill != 0 || + Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0) return createStringError(llvm::errc::invalid_argument, "option is not supported for MachO"); @@ -66,7 +68,7 @@ Expected ConfigManager::getWasmConfig() const { !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() || !Common.SymbolsToRename.empty() || Common.GapFill != 0 || - Common.PadTo != 0) + Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0) return createStringError(llvm::errc::invalid_argument, "only flags for section dumping, removal, and " "addition are supported"); @@ -94,7 +96,8 @@ Expected ConfigManager::getXCOFFConfig() const { Common.PreserveDates || Common.StripAllGNU || Common.StripDWO || Common.StripDebug || Common.StripNonAlloc || Common.StripSections || Common.Weaken || Common.StripUnneeded || Common.DecompressDebugSections || - Common.GapFill != 0 || Common.PadTo != 0) { + Common.GapFill != 0 || Common.PadTo != 0 || + Common.ChangeSectionLMAValAll != 0) { return createStringError( llvm::errc::invalid_argument, "no flags are supported yet, only basic copying is allowed"); diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp index f343d1447e0554..60a85f9a49c590 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -670,6 +670,33 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, } } + if (Config.ChangeSectionLMAValAll != 0) { + for (Segment &Seg : Obj.segments()) { + if (Seg.FileSize > 0) { + if (Config.ChangeSectionLMAValAll > 0 && + Seg.PAddr > std::numeric_limits::max() - + Config.ChangeSectionLMAValAll) { + return createStringError( + errc::invalid_argument, + "address 0x" + Twine::utohexstr(Seg.PAddr) + + " cannot be increased by 0x" + + Twine::utohexstr(Config.ChangeSectionLMAValAll) + + ". The result would overflow"); + } else if (Config.ChangeSectionLMAValAll < 0 && + Seg.PAddr < std::numeric_limits::min() - + Config.ChangeSectionLMAValAll) { + return createStringError( + errc::invalid_argument, + "address 0x" + Twine::utohexstr(Seg.PAddr) + + " cannot be decreased by 0x" + + Twine::utohexstr(std::abs(Config.ChangeSectionLMAValAll)) + + ". The result would underflow"); + } + Seg.PAddr += Config.ChangeSectionLMAValAll; + } + } + } + if (Config.OnlyKeepDebug) for (auto &Sec : Obj.sections()) if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE) diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp index 02591e6f987c26..f9c5d2579be693 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCELFExtras.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" @@ -107,12 +108,29 @@ Error ELFSectionSizer::visit(SymbolTableSection &Sec) { return Error::success(); } +template +static SmallVector encodeCrel(ArrayRef Relocations) { + using uint = std::conditional_t; + SmallVector Content; + raw_svector_ostream OS(Content); + ELF::encodeCrel(OS, Relocations, [&](const Relocation &R) { + uint32_t CurSymIdx = R.RelocSymbol ? R.RelocSymbol->Index : 0; + return ELF::Elf_Crel{static_cast(R.Offset), CurSymIdx, R.Type, + std::make_signed_t(R.Addend)}; + }); + return Content; +} + template Error ELFSectionSizer::visit(RelocationSection &Sec) { - Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela); - Sec.Size = Sec.Relocations.size() * Sec.EntrySize; - // Align to the largest field in Elf_Rel(a). - Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); + if (Sec.Type == SHT_CREL) { + Sec.Size = encodeCrel(Sec.Relocations).size(); + } else { + Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela); + Sec.Size = Sec.Relocations.size() * Sec.EntrySize; + // Align to the largest field in Elf_Rel(a). + Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word); + } return Error::success(); } @@ -874,6 +892,8 @@ StringRef RelocationSectionBase::getNamePrefix() const { return ".rel"; case SHT_RELA: return ".rela"; + case SHT_CREL: + return ".crel"; default: llvm_unreachable("not a relocation section"); } @@ -966,12 +986,16 @@ static void writeRel(const RelRange &Relocations, T *Buf, bool IsMips64EL) { template Error ELFSectionWriter::visit(const RelocationSection &Sec) { uint8_t *Buf = reinterpret_cast(Out.getBufferStart()) + Sec.Offset; - if (Sec.Type == SHT_REL) + if (Sec.Type == SHT_CREL) { + auto Content = encodeCrel(Sec.Relocations); + memcpy(Buf, Content.data(), Content.size()); + } else if (Sec.Type == SHT_REL) { writeRel(Sec.Relocations, reinterpret_cast(Buf), Sec.getObject().IsMips64EL); - else + } else { writeRel(Sec.Relocations, reinterpret_cast(Buf), Sec.getObject().IsMips64EL); + } return Error::success(); } @@ -1684,6 +1708,7 @@ Expected ELFBuilder::makeSection(const Elf_Shdr &Shdr) { switch (Shdr.sh_type) { case SHT_REL: case SHT_RELA: + case SHT_CREL: if (Shdr.sh_flags & SHF_ALLOC) { if (Expected> Data = ElfFile.getSectionContents(Shdr)) return Obj.addSection(*Data); @@ -1861,7 +1886,15 @@ template Error ELFBuilder::readSections(bool EnsureSymtab) { const typename ELFFile::Elf_Shdr *Shdr = Sections->begin() + RelSec->Index; - if (RelSec->Type == SHT_REL) { + if (RelSec->Type == SHT_CREL) { + auto RelsOrRelas = ElfFile.crels(*Shdr); + if (!RelsOrRelas) + return RelsOrRelas.takeError(); + if (Error Err = initRelocations(RelSec, RelsOrRelas->first)) + return Err; + if (Error Err = initRelocations(RelSec, RelsOrRelas->second)) + return Err; + } else if (RelSec->Type == SHT_REL) { Expected::Elf_Rel_Range> Rels = ElfFile.rels(*Shdr); if (!Rels) @@ -2206,8 +2239,17 @@ Error Object::removeSections( // Transfer removed sections into the Object RemovedSections container for use // later. std::move(Iter, Sections.end(), std::back_inserter(RemovedSections)); - // Now finally get rid of them all together. + // Now get rid of them altogether. Sections.erase(Iter, std::end(Sections)); + + // Finally erase empty SHT_GROUP sections. + llvm::erase_if(Sections, [](const SecPtr &Sec) { + if (auto GroupSec = dyn_cast(Sec.get())) + return GroupSec->getMembersCount() == 0; + + return false; + }); + return Error::success(); } diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h index 2b1895a30b41ed..2a9f337c3f3230 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.h +++ b/llvm/lib/ObjCopy/ELF/ELFObject.h @@ -881,7 +881,8 @@ class RelocationSectionBase : public SectionBase { StringRef getNamePrefix() const; static bool classof(const SectionBase *S) { - return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; + return is_contained({ELF::SHT_REL, ELF::SHT_RELA, ELF::SHT_CREL}, + S->OriginalType); } }; @@ -925,7 +926,7 @@ class RelocationSection static bool classof(const SectionBase *S) { if (S->OriginalFlags & ELF::SHF_ALLOC) return false; - return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; + return RelocationSectionBase::classof(S); } }; @@ -963,6 +964,8 @@ class GroupSection : public SectionBase { const DenseMap &FromTo) override; void onRemove() override; + size_t getMembersCount() const { return GroupMembers.size(); } + static bool classof(const SectionBase *S) { return S->OriginalType == ELF::SHT_GROUP; } diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 18a116754f4566..e47a40b8715dd5 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -408,46 +408,31 @@ ELFFile::getCrelHeader(ArrayRef Content) const { template Expected::RelsOrRelas> ELFFile::decodeCrel(ArrayRef Content) const { - DataExtractor Data(Content, isLE(), sizeof(typename ELFT::Addr)); - DataExtractor::Cursor Cur(0); - const uint64_t Hdr = Data.getULEB128(Cur); - const size_t Count = Hdr / 8; - const size_t FlagBits = Hdr & ELF::CREL_HDR_ADDEND ? 3 : 2; - const size_t Shift = Hdr % ELF::CREL_HDR_ADDEND; std::vector Rels; std::vector Relas; - if (Hdr & ELF::CREL_HDR_ADDEND) - Relas.resize(Count); - else - Rels.resize(Count); - typename ELFT::uint Offset = 0, Addend = 0; - uint32_t SymIdx = 0, Type = 0; - for (size_t I = 0; I != Count; ++I) { - // The delta offset and flags member may be larger than uint64_t. Special - // case the first byte (2 or 3 flag bits; the rest are offset bits). Other - // ULEB128 bytes encode the remaining delta offset bits. - const uint8_t B = Data.getU8(Cur); - Offset += B >> FlagBits; - if (B >= 0x80) - Offset += (Data.getULEB128(Cur) << (7 - FlagBits)) - (0x80 >> FlagBits); - // Delta symidx/type/addend members (SLEB128). - if (B & 1) - SymIdx += Data.getSLEB128(Cur); - if (B & 2) - Type += Data.getSLEB128(Cur); - if (B & 4 & Hdr) - Addend += Data.getSLEB128(Cur); - if (Hdr & ELF::CREL_HDR_ADDEND) { - Relas[I].r_offset = Offset << Shift; - Relas[I].setSymbolAndType(SymIdx, Type, false); - Relas[I].r_addend = Addend; - } else { - Rels[I].r_offset = Offset << Shift; - Rels[I].setSymbolAndType(SymIdx, Type, false); - } - } - if (!Cur) - return std::move(Cur.takeError()); + size_t I = 0; + bool HasAddend; + Error Err = object::decodeCrel( + Content, + [&](uint64_t Count, bool HasA) { + HasAddend = HasA; + if (HasAddend) + Relas.resize(Count); + else + Rels.resize(Count); + }, + [&](Elf_Crel Crel) { + if (HasAddend) { + Relas[I].r_offset = Crel.r_offset; + Relas[I].setSymbolAndType(Crel.r_symidx, Crel.r_type, false); + Relas[I++].r_addend = Crel.r_addend; + } else { + Rels[I].r_offset = Crel.r_offset; + Rels[I++].setSymbolAndType(Crel.r_symidx, Crel.r_type, false); + } + }); + if (Err) + return std::move(Err); return std::make_pair(std::move(Rels), std::move(Relas)); } diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index cbc55a145e0e6f..53c3de06d118cc 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -1013,3 +1013,14 @@ Expected> ELFObjectFileBase::readBBAddrMap( return readBBAddrMapImpl(cast(this)->getELFFile(), TextSectionIndex, PGOAnalyses); } + +StringRef ELFObjectFileBase::getCrelDecodeProblem(SectionRef Sec) const { + auto Data = Sec.getRawDataRefImpl(); + if (const auto *Obj = dyn_cast(this)) + return Obj->getCrelDecodeProblem(Data); + if (const auto *Obj = dyn_cast(this)) + return Obj->getCrelDecodeProblem(Data); + if (const auto *Obj = dyn_cast(this)) + return Obj->getCrelDecodeProblem(Data); + return cast(this)->getCrelDecodeProblem(Data); +} diff --git a/llvm/lib/ObjectYAML/XCOFFYAML.cpp b/llvm/lib/ObjectYAML/XCOFFYAML.cpp index 575334ee980e83..ca075b8147a724 100644 --- a/llvm/lib/ObjectYAML/XCOFFYAML.cpp +++ b/llvm/lib/ObjectYAML/XCOFFYAML.cpp @@ -327,7 +327,7 @@ void MappingTraits>::mapping( XCOFFYAML::AuxSymbolType AuxType; if (IO.outputting()) - AuxType = AuxSym.get()->Type; + AuxType = AuxSym->Type; IO.mapRequired("Type", AuxType); switch (AuxType) { case XCOFFYAML::AUX_EXCEPT: diff --git a/llvm/lib/Support/DivisionByConstantInfo.cpp b/llvm/lib/Support/DivisionByConstantInfo.cpp index 8150bd83c79f46..b0e503003a6803 100644 --- a/llvm/lib/Support/DivisionByConstantInfo.cpp +++ b/llvm/lib/Support/DivisionByConstantInfo.cpp @@ -79,7 +79,8 @@ UnsignedDivisionByConstantInfo::get(const APInt &D, unsigned LeadingZeros, APInt Delta; struct UnsignedDivisionByConstantInfo Retval; Retval.IsAdd = false; // initialize "add" indicator - APInt AllOnes = APInt::getAllOnes(D.getBitWidth()).lshr(LeadingZeros); + APInt AllOnes = + APInt::getLowBitsSet(D.getBitWidth(), D.getBitWidth() - LeadingZeros); APInt SignedMin = APInt::getSignedMinValue(D.getBitWidth()); APInt SignedMax = APInt::getSignedMaxValue(D.getBitWidth()); diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 092028dd2a5b34..9612db7d30f98b 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -115,7 +115,7 @@ struct llvm::TimeTraceProfiler { void end() { assert(!Stack.empty() && "Must call begin() first"); - end(*Stack.back().get()); + end(*Stack.back()); } void end(TimeTraceProfilerEntry &E) { diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc index b9110d4e414fff..34d294b232c32b 100644 --- a/llvm/lib/Support/Windows/Process.inc +++ b/llvm/lib/Support/Windows/Process.inc @@ -482,18 +482,9 @@ static RTL_OSVERSIONINFOEXW GetWindowsVer() { HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); assert(hMod); -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wcast-function-type-mismatch" -#endif - auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); assert(getVer); -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - RTL_OSVERSIONINFOEXW info{}; info.dwOSVersionInfoSize = sizeof(info); NTSTATUS r = getVer((PRTL_OSVERSIONINFOW)&info); diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index d057981c1c84cf..29ebf7c696e04f 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -167,11 +167,6 @@ static bool isDebugHelpInitialized() { return fStackWalk64 && fSymInitialize && fSymSetOptions && fMiniDumpWriteDump; } -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wcast-function-type-mismatch" -#endif - static bool load64BitDebugHelp(void) { HMODULE hLib = ::LoadLibraryExA("Dbghelp.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); @@ -197,10 +192,6 @@ static bool load64BitDebugHelp(void) { return isDebugHelpInitialized(); } -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - using namespace llvm; // Forward declare. diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index f9acc68ce5c7fd..d5e7af275d792d 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -2283,9 +2283,9 @@ DefInit *VarDefInit::instantiate() { ArrayRef TArgs = Class->getTemplateArgs(); MapResolver R(NewRec); - for (unsigned I = 0, E = TArgs.size(); I != E; ++I) { - R.set(TArgs[I], NewRec->getValue(TArgs[I])->getValue()); - NewRec->removeValue(TArgs[I]); + for (Init *Arg : TArgs) { + R.set(Arg, NewRec->getValue(Arg)->getValue()); + NewRec->removeValue(Arg); } for (auto *Arg : args()) { diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index f899fdb68f7582..ce9da960545c23 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -4381,8 +4381,7 @@ bool TGParser::CheckTemplateArgValues( SmallVectorImpl &Values, SMLoc Loc, Record *ArgsRec) { ArrayRef TArgs = ArgsRec->getTemplateArgs(); - for (unsigned I = 0, E = Values.size(); I < E; ++I) { - auto *Value = Values[I]; + for (llvm::ArgumentInit *&Value : Values) { Init *ArgName = nullptr; if (Value->isPositional()) ArgName = TArgs[Value->getIndex()]; @@ -4398,7 +4397,7 @@ bool TGParser::CheckTemplateArgValues( assert((!isa(CastValue) || cast(CastValue)->getType()->typeIsA(ArgType)) && "result of template arg value cast has wrong type"); - Values[I] = Value->cloneWithValue(CastValue); + Value = Value->cloneWithValue(CastValue); } else { PrintFatalError(Loc, "Value specified for template argument '" + Arg->getNameInitAsString() + "' is of type " + diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 341cf51173ccc2..7115e387506001 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11481,6 +11481,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( return std::make_pair(0U, &AArch64::ZPRRegClass); return std::make_pair(0U, nullptr); } + if (VT == MVT::Other) + break; uint64_t VTSize = VT.getFixedSizeInBits(); if (VTSize == 16) return std::make_pair(0U, &AArch64::FPR16RegClass); @@ -16121,6 +16123,24 @@ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op, return Result; } +static Value *createTblShuffleForSExt(IRBuilderBase &Builder, Value *Op, + FixedVectorType *DstTy, + bool IsLittleEndian) { + auto *SrcTy = cast(Op->getType()); + auto SrcWidth = cast(SrcTy->getElementType())->getBitWidth(); + auto DstWidth = cast(DstTy->getElementType())->getBitWidth(); + + SmallVector Mask; + if (!createTblShuffleMask(SrcWidth, DstWidth, SrcTy->getNumElements(), + !IsLittleEndian, Mask)) + return nullptr; + + auto *FirstEltZero = Builder.CreateInsertElement( + PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0)); + + return Builder.CreateShuffleVector(Op, FirstEltZero, Mask); +} + static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) { IRBuilder<> Builder(TI); SmallVector Parts; @@ -16301,14 +16321,29 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( Value *ZExt = createTblShuffleForZExt( Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy), FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian()); - if (!ZExt) - return false; + assert(ZExt && "Cannot fail for the i8 to float conversion"); auto *UI = Builder.CreateUIToFP(ZExt, DstTy); I->replaceAllUsesWith(UI); I->eraseFromParent(); return true; } + auto *SIToFP = dyn_cast(I); + if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) && + DstTy->getElementType()->isFloatTy()) { + IRBuilder<> Builder(I); + auto *Shuffle = createTblShuffleForSExt(Builder, I->getOperand(0), + FixedVectorType::getInteger(DstTy), + Subtarget->isLittleEndian()); + assert(Shuffle && "Cannot fail for the i8 to float conversion"); + auto *Cast = Builder.CreateBitCast(Shuffle, VectorType::getInteger(DstTy)); + auto *AShr = Builder.CreateAShr(Cast, 24, "", true); + auto *SI = Builder.CreateSIToFP(AShr, DstTy); + I->replaceAllUsesWith(SI); + I->eraseFromParent(); + return true; + } + // Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui // followed by a truncate lowered to using tbl.4. auto *FPToUI = dyn_cast(I); @@ -24833,7 +24868,7 @@ static SDValue tryCombineMULLWithUZP1(SDNode *N, } else if (isEssentiallyExtractHighSubvector(RHS) && LHS.getOpcode() == ISD::TRUNCATE) { TruncHigh = LHS; - if (LHS.getOpcode() == ISD::BITCAST) + if (RHS.getOpcode() == ISD::BITCAST) ExtractHigh = RHS.getOperand(0); else ExtractHigh = RHS; @@ -24862,6 +24897,7 @@ static SDValue tryCombineMULLWithUZP1(SDNode *N, // This dagcombine assumes the two extract_high uses same source vector in // order to detect the pair of the mull. If they have different source vector, // this code will not work. + // TODO: Should also try to look through a bitcast. bool HasFoundMULLow = true; SDValue ExtractHighSrcVec = ExtractHigh.getOperand(0); if (ExtractHighSrcVec->use_size() != 2) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 1e06d5fdc7562e..78c8bf1e323aba 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3111,7 +3111,8 @@ def BRK : ExceptionGeneration<0b001, 0b00, "brk", def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; -def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; +def HLT : ExceptionGeneration<0b010, 0b00, "hlt", + [(int_aarch64_hlt timm32_0_65535:$imm)]>; def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; def SVC : ExceptionGeneration<0b000, 0b01, "svc">; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 9e0860934f777e..6a0bec58127d5a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5551,7 +5551,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, } if (CV->getSplatValue()) { - APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger()); + APInt DefBits = APInt::getSplat( + DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits())); auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * { MachineInstr *NewOp; bool Inv = false; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 5616d063f70bcc..ff7152192fe35f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -42,6 +42,7 @@ #include "AArch64GenRegisterBankInfo.def" using namespace llvm; +static const unsigned CustomMappingID = 1; AArch64RegisterBankInfo::AArch64RegisterBankInfo( const TargetRegisterInfo &TRI) { @@ -424,6 +425,27 @@ void AArch64RegisterBankInfo::applyMappingImpl( MI.getOperand(2).setReg(Ext.getReg(0)); return applyDefaultMapping(OpdMapper); } + case AArch64::G_DUP: { + // Extend smaller gpr to 32-bits + assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 && + "Expected sources smaller than 32-bits"); + Builder.setInsertPt(*MI.getParent(), MI.getIterator()); + + Register ConstReg; + auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) { + auto CstVal = ConstMI->getOperand(1).getCImm()->getValue(); + ConstReg = + Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0); + ConstMI->eraseFromParent(); + } else { + ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg()) + .getReg(0); + } + MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID)); + MI.getOperand(1).setReg(ConstReg); + return applyDefaultMapping(OpdMapper); + } default: llvm_unreachable("Don't know how to handle that operation"); } @@ -792,8 +814,13 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || onlyDefinesFP(*ScalarDef, MRI, TRI))) OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; - else + else { + if (ScalarTy.getSizeInBits() < 32 && + getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) + // Calls applyMappingImpl() + MappingID = CustomMappingID; OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; + } break; } case TargetOpcode::G_TRUNC: { @@ -1015,7 +1042,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // type to i32 in applyMappingImpl. LLT Ty = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) - MappingID = 1; + // Calls applyMappingImpl() + MappingID = CustomMappingID; OpRegBankIdx[2] = PMI_FirstGPR; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 54968be677a377..35d17bbe2bd9af 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -393,7 +393,6 @@ Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B, Value *V, Value *const Identity) const { Type *AtomicTy = V->getType(); - Type *IntNTy = B.getIntNTy(AtomicTy->getPrimitiveSizeInBits()); Module *M = B.GetInsertBlock()->getModule(); Function *UpdateDPP = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, AtomicTy); @@ -409,34 +408,28 @@ Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B, // Reduce within each pair of rows (i.e. 32 lanes). assert(ST->hasPermLaneX16()); - V = B.CreateBitCast(V, IntNTy); Value *Permlanex16Call = B.CreateIntrinsic( V->getType(), Intrinsic::amdgcn_permlanex16, {V, V, B.getInt32(-1), B.getInt32(-1), B.getFalse(), B.getFalse()}); - V = buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy), - B.CreateBitCast(Permlanex16Call, AtomicTy)); + V = buildNonAtomicBinOp(B, Op, V, Permlanex16Call); if (ST->isWave32()) { return V; } if (ST->hasPermLane64()) { // Reduce across the upper and lower 32 lanes. - V = B.CreateBitCast(V, IntNTy); Value *Permlane64Call = B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_permlane64, V); - return buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy), - B.CreateBitCast(Permlane64Call, AtomicTy)); + return buildNonAtomicBinOp(B, Op, V, Permlane64Call); } // Pick an arbitrary lane from 0..31 and an arbitrary lane from 32..63 and // combine them with a scalar operation. Function *ReadLane = - Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, B.getInt32Ty()); - V = B.CreateBitCast(V, IntNTy); + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, AtomicTy); Value *Lane0 = B.CreateCall(ReadLane, {V, B.getInt32(0)}); Value *Lane32 = B.CreateCall(ReadLane, {V, B.getInt32(32)}); - return buildNonAtomicBinOp(B, Op, B.CreateBitCast(Lane0, AtomicTy), - B.CreateBitCast(Lane32, AtomicTy)); + return buildNonAtomicBinOp(B, Op, Lane0, Lane32); } // Use the builder to create an inclusive scan of V across the wavefront, with @@ -445,8 +438,6 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *V, Value *Identity) const { Type *AtomicTy = V->getType(); - Type *IntNTy = B.getIntNTy(AtomicTy->getPrimitiveSizeInBits()); - Module *M = B.GetInsertBlock()->getModule(); Function *UpdateDPP = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, AtomicTy); @@ -477,20 +468,17 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B, // Combine lane 15 into lanes 16..31 (and, for wave 64, lane 47 into lanes // 48..63). assert(ST->hasPermLaneX16()); - V = B.CreateBitCast(V, IntNTy); Value *PermX = B.CreateIntrinsic( V->getType(), Intrinsic::amdgcn_permlanex16, {V, V, B.getInt32(-1), B.getInt32(-1), B.getFalse(), B.getFalse()}); - Value *UpdateDPPCall = - B.CreateCall(UpdateDPP, {Identity, B.CreateBitCast(PermX, AtomicTy), - B.getInt32(DPP::QUAD_PERM_ID), B.getInt32(0xa), - B.getInt32(0xf), B.getFalse()}); - V = buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy), UpdateDPPCall); + Value *UpdateDPPCall = B.CreateCall( + UpdateDPP, {Identity, PermX, B.getInt32(DPP::QUAD_PERM_ID), + B.getInt32(0xa), B.getInt32(0xf), B.getFalse()}); + V = buildNonAtomicBinOp(B, Op, V, UpdateDPPCall); if (!ST->isWave32()) { // Combine lane 31 into lanes 32..63. - V = B.CreateBitCast(V, IntNTy); Value *const Lane31 = B.CreateIntrinsic( V->getType(), Intrinsic::amdgcn_readlane, {V, B.getInt32(31)}); @@ -498,8 +486,7 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B, UpdateDPP, {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID), B.getInt32(0xc), B.getInt32(0xf), B.getFalse()}); - V = buildNonAtomicBinOp(B, Op, B.CreateBitCast(V, AtomicTy), - UpdateDPPCall); + V = buildNonAtomicBinOp(B, Op, V, UpdateDPPCall); } } return V; @@ -510,8 +497,6 @@ Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B, Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V, Value *Identity) const { Type *AtomicTy = V->getType(); - Type *IntNTy = B.getIntNTy(AtomicTy->getPrimitiveSizeInBits()); - Module *M = B.GetInsertBlock()->getModule(); Function *UpdateDPP = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, AtomicTy); @@ -521,10 +506,10 @@ Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V, {Identity, V, B.getInt32(DPP::WAVE_SHR1), B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}); } else { - Function *ReadLane = Intrinsic::getDeclaration( - M, Intrinsic::amdgcn_readlane, B.getInt32Ty()); - Function *WriteLane = Intrinsic::getDeclaration( - M, Intrinsic::amdgcn_writelane, B.getInt32Ty()); + Function *ReadLane = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, AtomicTy); + Function *WriteLane = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_writelane, AtomicTy); // On GFX10 all DPP operations are confined to a single row. To get cross- // row operations we have to use permlane or readlane. @@ -534,24 +519,19 @@ Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V, B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}); // Copy the old lane 15 to the new lane 16. - V = B.CreateCall( - WriteLane, - {B.CreateCall(ReadLane, {B.CreateBitCast(Old, IntNTy), B.getInt32(15)}), - B.getInt32(16), B.CreateBitCast(V, IntNTy)}); - V = B.CreateBitCast(V, AtomicTy); + V = B.CreateCall(WriteLane, {B.CreateCall(ReadLane, {Old, B.getInt32(15)}), + B.getInt32(16), V}); + if (!ST->isWave32()) { // Copy the old lane 31 to the new lane 32. - V = B.CreateBitCast(V, IntNTy); - V = B.CreateCall(WriteLane, - {B.CreateCall(ReadLane, {B.CreateBitCast(Old, IntNTy), - B.getInt32(31)}), - B.getInt32(32), V}); + V = B.CreateCall( + WriteLane, + {B.CreateCall(ReadLane, {Old, B.getInt32(31)}), B.getInt32(32), V}); // Copy the old lane 47 to the new lane 48. V = B.CreateCall( WriteLane, {B.CreateCall(ReadLane, {Old, B.getInt32(47)}), B.getInt32(48), V}); - V = B.CreateBitCast(V, AtomicTy); } } @@ -591,24 +571,18 @@ std::pair AMDGPUAtomicOptimizerImpl::buildScanIteratively( auto *FF1 = B.CreateIntrinsic(Intrinsic::cttz, WaveTy, {ActiveBits, B.getTrue()}); - Type *IntNTy = B.getIntNTy(Ty->getPrimitiveSizeInBits()); - auto *LaneIdxInt = B.CreateTrunc(FF1, IntNTy); + auto *LaneIdxInt = B.CreateTrunc(FF1, B.getInt32Ty()); // Get the value required for atomic operation - V = B.CreateBitCast(V, IntNTy); Value *LaneValue = B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_readlane, {V, LaneIdxInt}); - LaneValue = B.CreateBitCast(LaneValue, Ty); // Perform writelane if intermediate scan results are required later in the // kernel computations Value *OldValue = nullptr; if (NeedResult) { - OldValue = - B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_writelane, - {B.CreateBitCast(Accumulator, IntNTy), LaneIdxInt, - B.CreateBitCast(OldValuePhi, IntNTy)}); - OldValue = B.CreateBitCast(OldValue, Ty); + OldValue = B.CreateIntrinsic(V->getType(), Intrinsic::amdgcn_writelane, + {Accumulator, LaneIdxInt, OldValuePhi}); OldValuePhi->addIncoming(OldValue, ComputeLoop); } @@ -710,10 +684,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, Type *const Ty = I.getType(); Type *Int32Ty = B.getInt32Ty(); - Type *IntNTy = B.getIntNTy(Ty->getPrimitiveSizeInBits()); bool isAtomicFloatingPointTy = Ty->isFloatingPointTy(); - const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty); - auto *const VecTy = FixedVectorType::get(Int32Ty, 2); + [[maybe_unused]] const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty); // This is the value in the atomic operation we need to combine in order to // reduce the number of atomic operations. @@ -768,13 +740,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, if (ScanImpl == ScanOptions::DPP) { // First we need to set all inactive invocations to the identity value, so // that they can correctly contribute to the final result. - V = B.CreateBitCast(V, IntNTy); - Identity = B.CreateBitCast(Identity, IntNTy); - NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, IntNTy, - {V, Identity}); - NewV = B.CreateBitCast(NewV, Ty); - V = B.CreateBitCast(V, Ty); - Identity = B.CreateBitCast(Identity, Ty); + NewV = + B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity}); if (!NeedResult && ST->hasPermLaneX16()) { // On GFX10 the permlanex16 instruction helps us build a reduction // without too many readlanes and writelanes, which are generally bad @@ -789,10 +756,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, // which we will provide to the atomic operation. Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1); assert(TyBitWidth == 32); - NewV = B.CreateBitCast(NewV, IntNTy); - NewV = B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readlane, + NewV = B.CreateIntrinsic(Ty, Intrinsic::amdgcn_readlane, {NewV, LastLaneIdx}); - NewV = B.CreateBitCast(NewV, Ty); } // Finally mark the readlanes in the WWM section. NewV = B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, NewV); @@ -931,30 +896,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I, // lane) to all other lanes in the wavefront. We use an intrinsic for this, // but have to handle 64-bit broadcasts with two calls to this intrinsic. Value *BroadcastI = nullptr; - - if (TyBitWidth == 64) { - Value *CastedPhi = B.CreateBitCast(PHI, IntNTy); - Value *const ExtractLo = B.CreateTrunc(CastedPhi, Int32Ty); - Value *const ExtractHi = - B.CreateTrunc(B.CreateLShr(CastedPhi, 32), Int32Ty); - CallInst *const ReadFirstLaneLo = B.CreateIntrinsic( - Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractLo); - CallInst *const ReadFirstLaneHi = B.CreateIntrinsic( - Int32Ty, Intrinsic::amdgcn_readfirstlane, ExtractHi); - Value *const PartialInsert = B.CreateInsertElement( - PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0)); - Value *const Insert = - B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1)); - BroadcastI = B.CreateBitCast(Insert, Ty); - } else if (TyBitWidth == 32) { - Value *CastedPhi = B.CreateBitCast(PHI, IntNTy); - BroadcastI = - B.CreateIntrinsic(IntNTy, Intrinsic::amdgcn_readfirstlane, CastedPhi); - BroadcastI = B.CreateBitCast(BroadcastI, Ty); - - } else { - llvm_unreachable("Unhandled atomic bit width"); - } + BroadcastI = B.CreateIntrinsic(Ty, Intrinsic::amdgcn_readfirstlane, PHI); // Now that we have the result of our single atomic operation, we need to // get our individual lane's slice into the result. We use the lane offset diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 0c0e9163fc6ef6..695b2f246a778d 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2366,8 +2366,16 @@ const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id, int64_t Val) { MCContext &Ctx = getContext(); MCSymbol *Sym = Ctx.getOrCreateSymbol(Id); - assert(!Sym->isVariable()); - Sym->setVariableValue(MCConstantExpr::create(Val, Ctx)); + // Note: only set value to Val on a new symbol in case an dissassembler + // has already been initialized in this context. + if (!Sym->isVariable()) { + Sym->setVariableValue(MCConstantExpr::create(Val, Ctx)); + } else { + int64_t Res = ~Val; + bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res); + if (!Valid || Res != Val) + Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id); + } return MCSymbolRefExpr::create(Sym, Ctx); } diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index e1468bf850cd79..15fd36ebd10a40 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -15,7 +15,8 @@ // - MIMGEncGfx10Default: gfx10 default (non-NSA) encoding // - MIMGEncGfx10NSA: gfx10 NSA encoding // - MIMGEncGfx11Default: gfx11 default (non-NSA) encoding -// - MIMGEncGfx11NSA: gfx11 NSA encoding +// - MIMGEncGfx11NSA: gfx11 partial NSA encoding +// - MIMGEncGfx12: gfx12 encoding (partial NSA) class MIMGEncoding; def MIMGEncGfx6 : MIMGEncoding; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a3fd72fe1351c9..cc36e8b33472b4 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14798,7 +14798,7 @@ static SDValue PerformORCombine(SDNode *N, N0->getOperand(1), N0->getOperand(0), N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result); } } } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 10bf2360cb54d9..99f57f47835abd 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -85,10 +85,10 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI, } if (RiscvAbiAttr && STI.hasFeature(RISCV::FeatureStdExtA)) { - unsigned AtomicABITag = - static_cast(STI.hasFeature(RISCV::FeatureTrailingSeqCstFence) - ? RISCVAttrs::RISCVAtomicAbiTag::A6S - : RISCVAttrs::RISCVAtomicAbiTag::A6C); + unsigned AtomicABITag = static_cast( + STI.hasFeature(RISCV::FeatureNoTrailingSeqCstFence) + ? RISCVAttrs::RISCVAtomicAbiTag::A6C + : RISCVAttrs::RISCVAtomicAbiTag::A6S); emitAttribute(RISCVAttrs::ATOMIC_ABI, AtomicABITag); } } diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 697a22b30dfcca..74938ef96ce68b 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1244,10 +1244,10 @@ foreach i = {1-31} in def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore", "true", "Enable save/restore.">; -def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence", - "EnableSeqCstTrailingFence", - "true", - "Enable trailing fence for seq-cst store.">; +def FeatureNoTrailingSeqCstFence : SubtargetFeature<"no-trailing-seq-cst-fence", + "EnableTrailingSeqCstFence", + "false", + "Disable trailing fence for seq-cst store.">; def FeatureUnalignedScalarMem : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 7bdd4f8f4dbc30..ce6a396e9ced9a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3737,6 +3737,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { Info = RISCV::getMaskedPseudoInfo(TrueOpc); IsMasked = true; } + assert(!(IsMasked && !HasTiedDest) && "Expected tied dest"); if (!Info) return false; @@ -3756,11 +3757,10 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // If True is masked then the vmerge must have an all 1s mask, since we're // going to keep the mask from True. - if (IsMasked) { - assert(HasTiedDest && "Expected tied dest"); + if (IsMasked && Mask) { // FIXME: Support mask agnostic True instruction which would have an // undef merge operand. - if (Mask && !usesAllOnesMask(Mask, Glue)) + if (!usesAllOnesMask(Mask, Glue)) return false; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1e37f2c3b9c597..ef76705d8f6628 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1329,6 +1329,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR. setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + // We need to custom legalize f16 build vectors if Zfhmin isn't + // available. + if (!Subtarget.hasStdExtZfhminOrZhinxmin()) + setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); // Don't promote f16 vector operations to f32 if f32 vector type is // not legal. @@ -3901,11 +3905,86 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, return SDValue(); } +/// Double the element size of the build vector to reduce the number +/// of vslide1down in the build vector chain. In the worst case, this +/// trades three scalar operations for 1 vector operation. Scalar +/// operations are generally lower latency, and for out-of-order cores +/// we also benefit from additional parallelism. +static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + assert(VT.isFixedLengthVector() && "Unexpected vector!"); + MVT ElemVT = VT.getVectorElementType(); + if (!ElemVT.isInteger()) + return SDValue(); + + // TODO: Relax these architectural restrictions, possibly with costing + // of the actual instructions required. + if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned ElemSizeInBits = ElemVT.getSizeInBits(); + if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) || + NumElts % 2 != 0) + return SDValue(); + + // Produce [B,A] packed into a type twice as wide. Note that all + // scalars are XLenVT, possibly masked (see below). + MVT XLenVT = Subtarget.getXLenVT(); + auto pack = [&](SDValue A, SDValue B) { + // Bias the scheduling of the inserted operations to near the + // definition of the element - this tends to reduce register + // pressure overall. + SDLoc ElemDL(B); + SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT); + return DAG.getNode(ISD::OR, ElemDL, XLenVT, A, + DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt)); + }; + + SDValue Mask = DAG.getConstant( + APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT); + SmallVector NewOperands; + NewOperands.reserve(NumElts / 2); + for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2) { + SDValue A = Op.getOperand(i); + SDValue B = Op.getOperand(i + 1); + // Bias the scheduling of the inserted operations to near the + // definition of the element - this tends to reduce register + // pressure overall. + A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask); + B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask); + NewOperands.push_back(pack(A, B)); + } + assert(NumElts == NewOperands.size() * 2); + MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2); + MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getBuildVector(WideVecVT, DL, NewOperands)); +} + +// Convert to an vXf16 build_vector to vXi16 with bitcasts. +static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + MVT IVT = VT.changeVectorElementType(MVT::i16); + SmallVector NewOps(Op.getNumOperands()); + for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) + NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I)); + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps); + return DAG.getBitcast(VT, Res); +} + static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert(VT.isFixedLengthVector() && "Unexpected vector!"); + // If we don't have scalar f16, we need to bitcast to an i16 vector. + if (VT.getVectorElementType() == MVT::f16 && + !Subtarget.hasStdExtZfhminOrZhinxmin()) + return lowerBUILD_VECTORvXf16(Op, DAG); + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) return lowerBuildVectorOfConstants(Op, DAG, Subtarget); @@ -3986,6 +4065,13 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, Vec, DAG, Subtarget); } + // If we're about to resort to vslide1down (or stack usage), pack our + // elements into the widest scalar type we can. This will force a VL/VTYPE + // toggle, but reduces the critical path, the number of vslide1down ops + // required, and possibly enables scalar folds of the values. + if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget)) + return Res; + // For m1 vectors, if we have non-undef values in both halves of our vector, // split the vector into low and high halves, build them separately, then // use a vselect to combine them. For long vectors, this cuts the critical @@ -20796,7 +20882,7 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, if (isa(Inst) && isAcquireOrStronger(Ord)) return Builder.CreateFence(AtomicOrdering::Acquire); - if (Subtarget.enableSeqCstTrailingFence() && isa(Inst) && + if (Subtarget.enableTrailingSeqCstFence() && isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); return nullptr; diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 44a54c8ec62cbc..9553a8619feb51 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -83,6 +83,7 @@ set(sources X86TargetTransformInfo.cpp X86VZeroUpper.cpp X86WinEHState.cpp + X86WinFixupBufferSecurityCheck.cpp X86InsertWait.cpp GISel/X86CallLowering.cpp GISel/X86InstructionSelector.cpp diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index a89408bb79b065..b24b8acce64120 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -150,25 +150,21 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::AND16ri8: case X86::AND16rm: case X86::AND16rr: - case X86::AND16rr_REV: case X86::AND32i32: case X86::AND32ri: case X86::AND32ri8: case X86::AND32rm: case X86::AND32rr: - case X86::AND32rr_REV: case X86::AND64i32: case X86::AND64ri32: case X86::AND64ri8: case X86::AND64rm: case X86::AND64rr: - case X86::AND64rr_REV: case X86::AND8i8: case X86::AND8ri: case X86::AND8ri8: case X86::AND8rm: case X86::AND8rr: - case X86::AND8rr_REV: return FirstMacroFusionInstKind::And; // CMP case X86::CMP16i16: @@ -177,28 +173,24 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::CMP16ri8: case X86::CMP16rm: case X86::CMP16rr: - case X86::CMP16rr_REV: case X86::CMP32i32: case X86::CMP32mr: case X86::CMP32ri: case X86::CMP32ri8: case X86::CMP32rm: case X86::CMP32rr: - case X86::CMP32rr_REV: case X86::CMP64i32: case X86::CMP64mr: case X86::CMP64ri32: case X86::CMP64ri8: case X86::CMP64rm: case X86::CMP64rr: - case X86::CMP64rr_REV: case X86::CMP8i8: case X86::CMP8mr: case X86::CMP8ri: case X86::CMP8ri8: case X86::CMP8rm: case X86::CMP8rr: - case X86::CMP8rr_REV: return FirstMacroFusionInstKind::Cmp; // ADD case X86::ADD16i16: @@ -206,50 +198,42 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::ADD16ri8: case X86::ADD16rm: case X86::ADD16rr: - case X86::ADD16rr_REV: case X86::ADD32i32: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD32rm: case X86::ADD32rr: - case X86::ADD32rr_REV: case X86::ADD64i32: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD64rm: case X86::ADD64rr: - case X86::ADD64rr_REV: case X86::ADD8i8: case X86::ADD8ri: case X86::ADD8ri8: case X86::ADD8rm: case X86::ADD8rr: - case X86::ADD8rr_REV: // SUB case X86::SUB16i16: case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB16rm: case X86::SUB16rr: - case X86::SUB16rr_REV: case X86::SUB32i32: case X86::SUB32ri: case X86::SUB32ri8: case X86::SUB32rm: case X86::SUB32rr: - case X86::SUB32rr_REV: case X86::SUB64i32: case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB64rm: case X86::SUB64rr: - case X86::SUB64rr_REV: case X86::SUB8i8: case X86::SUB8ri: case X86::SUB8ri8: case X86::SUB8rm: case X86::SUB8rr: - case X86::SUB8rr_REV: return FirstMacroFusionInstKind::AddSub; // INC case X86::INC16r: diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index fdb9e4cad5e897..d6e0d5e3a3b2c6 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -73,6 +73,9 @@ FunctionPass *createX86OptimizeLEAs(); /// Return a pass that transforms setcc + movzx pairs into xor + setcc. FunctionPass *createX86FixupSetCC(); +/// Return a pass that transform inline buffer security check into seperate bb +FunctionPass *createX86WinFixupBufferSecurityCheckPass(); + /// Return a pass that avoids creating store forward block issues in the hardware. FunctionPass *createX86AvoidStoreForwardingBlocks(); @@ -186,6 +189,7 @@ void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FastPreTileConfigPass(PassRegistry &); void initializeX86FastTileConfigPass(PassRegistry &); void initializeX86FixupSetCCPassPass(PassRegistry &); +void initializeX86WinFixupBufferSecurityCheckPassPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &); void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 68b78c7c44771f..fdd7d5f1ee0e73 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -749,6 +749,11 @@ def TuningUseGLMDivSqrtCosts : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", "Use Goldmont specific floating point div/sqrt costs">; +// Starting with Redwood Cove architecture, the branch has branch taken hint +// (i.e., instruction prefix 3EH). +def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true", + "Target has branch hint feature">; + //===----------------------------------------------------------------------===// // X86 CPU Families // TODO: Remove these - use general tuning features to determine codegen. @@ -1124,6 +1129,8 @@ def ProcessorFeatures { FeaturePREFETCHI]; list GNRFeatures = !listconcat(SPRFeatures, GNRAdditionalFeatures); + list GNRAdditionalTuning = [TuningBranchHint]; + list GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning); // Graniterapids D list GNRDAdditionalFeatures = [FeatureAMXCOMPLEX]; @@ -1815,12 +1822,12 @@ def : ProcModel<"pantherlake", AlderlakePModel, def : ProcModel<"clearwaterforest", AlderlakePModel, ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; def : ProcModel<"graniterapids", SapphireRapidsModel, - ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>; + ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>; def : ProcModel<"emeraldrapids", SapphireRapidsModel, - ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; + ProcessorFeatures.SPRFeatures, ProcessorFeatures.GNRTuning>; foreach P = ["graniterapids-d", "graniterapids_d"] in { def : ProcModel; + ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>; } // AMD CPUs. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e03edf92cc4780..e116285d043c0c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41339,24 +41339,24 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, SDValue V1 = peekThroughBitcasts(N.getOperand(0)); SDValue V2 = peekThroughBitcasts(N.getOperand(2)); MVT SVT = V1.getSimpleValueType(); - MVT NVT = VT.getDoubleNumVectorElementsVT(); - if ((NVT.is256BitVector() || - (NVT.is512BitVector() && Subtarget.hasEVEX512())) && - V1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR && V1.getConstantOperandVal(1) == 0 && V2.getOpcode() == ISD::EXTRACT_SUBVECTOR && V2.getConstantOperandVal(1) == SVT.getVectorNumElements() && V1.getOperand(0) == V2.getOperand(0)) { - SDValue Mask = - DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NVT, DAG.getUNDEF(NVT), - N.getOperand(1), DAG.getIntPtrConstant(0, DL)); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, VT, - DAG.getNode(X86ISD::VPERMV, DL, NVT, Mask, - DAG.getBitcast(NVT, V1.getOperand(0))), - DAG.getIntPtrConstant(0, DL)); + EVT NVT = V1.getOperand(0).getValueType(); + if (NVT.is256BitVector() || + (NVT.is512BitVector() && Subtarget.hasEVEX512())) { + MVT WideVT = MVT::getVectorVT( + VT.getScalarType(), NVT.getSizeInBits() / VT.getScalarSizeInBits()); + SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG, + DL, WideVT.getSizeInBits()); + SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, + DAG.getBitcast(WideVT, V1.getOperand(0))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm, + DAG.getIntPtrConstant(0, DL)); + } } - return SDValue(); } default: diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 00f58f9432e4d7..df20ecd1b9b214 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -54,6 +55,14 @@ using namespace llvm; +static cl::opt EnableBranchHint("enable-branch-hint", + cl::desc("Enable branch hint."), + cl::init(false), cl::Hidden); +static cl::opt BranchHintProbabilityThreshold( + "branch-hint-probability-threshold", + cl::desc("The probability threshold of enabling branch hint."), + cl::init(50), cl::Hidden); + namespace { /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. @@ -2444,6 +2453,21 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); break; + case X86::JCC_1: + // Two instruction prefixes (2EH for branch not-taken and 3EH for branch + // taken) are used as branch hints. Here we add branch taken prefix for + // jump instruction with higher probability than threshold. + if (getSubtarget().hasBranchHint() && EnableBranchHint) { + const MachineBranchProbabilityInfo *MBPI = + &getAnalysis().getMBPI(); + MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); + BranchProbability EdgeProb = + MBPI->getEdgeProbability(MI->getParent(), DestBB); + BranchProbability Threshold(BranchHintProbabilityThreshold, 100); + if (EdgeProb > Threshold) + EmitAndCountInstruction(MCInstBuilder(X86::DS_PREFIX)); + } + break; } MCInst TmpInst; diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index d4e642c7df9cf9..4c77f40fd32a3a 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -550,6 +550,7 @@ bool X86PassConfig::addPreISel() { void X86PassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOptLevel::None) { addPass(&LiveRangeShrinkID); + addPass(createX86WinFixupBufferSecurityCheckPass()); addPass(createX86FixupSetCC()); addPass(createX86OptimizeLEAs()); addPass(createX86CallFrameOptimization()); diff --git a/llvm/lib/Target/X86/X86WinFixupBufferSecurityCheck.cpp b/llvm/lib/Target/X86/X86WinFixupBufferSecurityCheck.cpp new file mode 100644 index 00000000000000..7101b0bd70312a --- /dev/null +++ b/llvm/lib/Target/X86/X86WinFixupBufferSecurityCheck.cpp @@ -0,0 +1,247 @@ +//===- X86WinFixupBufferSecurityCheck.cpp Fix Buffer Security Check Call -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Buffer Security Check implementation inserts windows specific callback into +// code. On windows, __security_check_cookie call gets call everytime function +// is return without fixup. Since this function is defined in runtime library, +// it incures cost of call in dll which simply does comparison and returns most +// time. With Fixup, We selective move to call in DLL only if comparison fails. +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86FrameLowering.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Module.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "x86-win-fixup-bscheck" + +namespace { + +class X86WinFixupBufferSecurityCheckPass : public MachineFunctionPass { +public: + static char ID; + + X86WinFixupBufferSecurityCheckPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Windows Fixup Buffer Security Check"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + std::pair + getSecurityCheckerBasicBlock(MachineFunction &MF); + + void getGuardCheckSequence(MachineBasicBlock *CurMBB, MachineInstr *CheckCall, + MachineInstr *SeqMI[5]); + + void SplitBasicBlock(MachineBasicBlock *CurMBB, MachineBasicBlock *NewRetMBB, + MachineBasicBlock::iterator SplitIt); + + void FinishBlock(MachineBasicBlock *MBB); + + void FinishFunction(MachineBasicBlock *FailMBB, MachineBasicBlock *NewRetMBB); + + std::pair + CreateFailCheckSequence(MachineBasicBlock *CurMBB, MachineBasicBlock *FailMBB, + MachineInstr *SeqMI[5]); +}; +} // end anonymous namespace + +char X86WinFixupBufferSecurityCheckPass::ID = 0; + +INITIALIZE_PASS(X86WinFixupBufferSecurityCheckPass, DEBUG_TYPE, DEBUG_TYPE, + false, false) + +FunctionPass *llvm::createX86WinFixupBufferSecurityCheckPass() { + return new X86WinFixupBufferSecurityCheckPass(); +} + +void X86WinFixupBufferSecurityCheckPass::SplitBasicBlock( + MachineBasicBlock *CurMBB, MachineBasicBlock *NewRetMBB, + MachineBasicBlock::iterator SplitIt) { + NewRetMBB->splice(NewRetMBB->end(), CurMBB, SplitIt, CurMBB->end()); +} + +std::pair +X86WinFixupBufferSecurityCheckPass::getSecurityCheckerBasicBlock( + MachineFunction &MF) { + MachineBasicBlock::reverse_iterator RBegin, REnd; + + for (auto &MBB : llvm::reverse(MF)) { + for (RBegin = MBB.rbegin(), REnd = MBB.rend(); RBegin != REnd; ++RBegin) { + auto &MI = *RBegin; + if (MI.getOpcode() == X86::CALL64pcrel32 && + MI.getNumExplicitOperands() == 1) { + auto MO = MI.getOperand(0); + if (MO.isGlobal()) { + auto Callee = dyn_cast(MO.getGlobal()); + if (Callee && Callee->getName() == "__security_check_cookie") { + return std::make_pair(&MBB, &MI); + break; + } + } + } + } + } + return std::make_pair(nullptr, nullptr); +} + +void X86WinFixupBufferSecurityCheckPass::getGuardCheckSequence( + MachineBasicBlock *CurMBB, MachineInstr *CheckCall, + MachineInstr *SeqMI[5]) { + + MachineBasicBlock::iterator UIt(CheckCall); + MachineBasicBlock::reverse_iterator DIt(CheckCall); + // Seq From StackUp to Stack Down Is fixed. + // ADJCALLSTACKUP64 + ++UIt; + SeqMI[4] = &*UIt; + + // CALL __security_check_cookie + SeqMI[3] = CheckCall; + + // COPY function slot cookie + ++DIt; + SeqMI[2] = &*DIt; + + // ADJCALLSTACKDOWN64 + ++DIt; + SeqMI[1] = &*DIt; + + MachineBasicBlock::reverse_iterator XIt(SeqMI[1]); + for (; XIt != CurMBB->rbegin(); ++XIt) { + auto &CI = *XIt; + if ((CI.getOpcode() == X86::XOR64_FP) || (CI.getOpcode() == X86::XOR32_FP)) + break; + } + SeqMI[0] = &*XIt; +} + +std::pair +X86WinFixupBufferSecurityCheckPass::CreateFailCheckSequence( + MachineBasicBlock *CurMBB, MachineBasicBlock *FailMBB, + MachineInstr *SeqMI[5]) { + + auto MF = CurMBB->getParent(); + + Module &M = *MF->getFunction().getParent(); + GlobalVariable *GV = M.getGlobalVariable("__security_cookie"); + assert(GV && " Security Cookie was not installed!"); + + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + MachineInstr *GuardXor = SeqMI[0]; + MachineBasicBlock::iterator InsertPt(GuardXor); + ++InsertPt; + + // Compare security_Cookie with XOR_Val, if not same, we have violation + auto CMI = BuildMI(*CurMBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rm)) + .addReg(GuardXor->getOperand(0).getReg()) + .addReg(X86::RIP) + .addImm(1) + .addReg(X86::NoRegister) + .addGlobalAddress(GV) + .addReg(X86::NoRegister); + + BuildMI(*CurMBB, InsertPt, DebugLoc(), TII->get(X86::JCC_1)) + .addMBB(FailMBB) + .addImm(X86::COND_NE); + + auto JMI = BuildMI(*CurMBB, InsertPt, DebugLoc(), TII->get(X86::JMP_1)); + + return std::make_pair(CMI.getInstr(), JMI.getInstr()); +} + +void X86WinFixupBufferSecurityCheckPass::FinishBlock(MachineBasicBlock *MBB) { + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *MBB); +} + +void X86WinFixupBufferSecurityCheckPass::FinishFunction( + MachineBasicBlock *FailMBB, MachineBasicBlock *NewRetMBB) { + FailMBB->getParent()->RenumberBlocks(); + // FailMBB includes call to MSCV RT where is __security_check_cookie + // function is called. This function uses regcall and it expects cookie + // value from stack slot.( even if this is modified) + // Before going further we compute back livein for this block to make sure + // it is live and provided. + FinishBlock(FailMBB); + FinishBlock(NewRetMBB); +} + +bool X86WinFixupBufferSecurityCheckPass::runOnMachineFunction( + MachineFunction &MF) { + bool Changed = false; + const X86Subtarget &STI = MF.getSubtarget(); + + if (!(STI.isTargetWindowsItanium() || STI.isTargetWindowsMSVC())) + return Changed; + + // Check if security cookie was installed or not + Module &M = *MF.getFunction().getParent(); + GlobalVariable *GV = M.getGlobalVariable("__security_cookie"); + if (!GV) + return Changed; + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + // Check if security check cookie was installed or not + auto [CurMBB, CheckCall] = getSecurityCheckerBasicBlock(MF); + + if (!CheckCall) + return Changed; + + MachineBasicBlock *FailMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *NewRetMBB = MF.CreateMachineBasicBlock(); + + MF.insert(MF.end(), NewRetMBB); + MF.insert(MF.end(), FailMBB); + + MachineInstr *SeqMI[5]; + getGuardCheckSequence(CurMBB, CheckCall, SeqMI); + // MachineInstr * GuardXor = SeqMI[0]; + + auto FailSeqRange = CreateFailCheckSequence(CurMBB, FailMBB, SeqMI); + MachineInstrBuilder JMI(MF, FailSeqRange.second); + + // After Inserting JMP_1, we can not have two terminators + // in same block, split CurrentMBB after JMP_1 + MachineBasicBlock::iterator SplitIt(SeqMI[4]); + ++SplitIt; + SplitBasicBlock(CurMBB, NewRetMBB, SplitIt); + + // Fill up Failure Routine, move Fail Check Squence from CurMBB to FailMBB + MachineBasicBlock::iterator U1It(SeqMI[1]); + MachineBasicBlock::iterator U2It(SeqMI[4]); + ++U2It; + FailMBB->splice(FailMBB->end(), CurMBB, U1It, U2It); + BuildMI(*FailMBB, FailMBB->end(), DebugLoc(), TII->get(X86::INT3)); + + // Move left over instruction after StackUp + // from Current Basic BLocks into New Return Block + JMI.addMBB(NewRetMBB); + MachineBasicBlock::iterator SplicePt(JMI.getInstr()); + ++SplicePt; + if (SplicePt != CurMBB->end()) + NewRetMBB->splice(NewRetMBB->end(), CurMBB, SplicePt); + + // Restructure Basic Blocks + CurMBB->addSuccessor(NewRetMBB); + CurMBB->addSuccessor(FailMBB); + + FinishFunction(FailMBB, NewRetMBB); + return !Changed; +} diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 2ea56746aff249..c43bf370d19662 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -703,14 +703,13 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } } -static StringRef -getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, - const unsigned *Features, - unsigned *Type, unsigned *Subtype) { - auto testFeature = [&](unsigned F) { - return (Features[F / 32] & (1U << (F % 32))) != 0; - }; +#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 +static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, + unsigned Model, + const unsigned *Features, + unsigned *Type, + unsigned *Subtype) { StringRef CPU; switch (Family) { @@ -1067,15 +1066,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, return CPU; } -static StringRef -getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, - const unsigned *Features, - unsigned *Type, unsigned *Subtype) { - auto testFeature = [&](unsigned F) { - return (Features[F / 32] & (1U << (F % 32))) != 0; - }; - - StringRef CPU; +static const char *getAMDProcessorTypeAndSubtype(unsigned Family, + unsigned Model, + const unsigned *Features, + unsigned *Type, + unsigned *Subtype) { + const char *CPU = 0; switch (Family) { case 4: @@ -1215,7 +1211,7 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, *Subtype = X86::AMDFAM19H_ZNVER4; break; // "znver4" } - break; + break; // family 19h default: break; // Unknown AMD CPU. } @@ -1223,6 +1219,8 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, return CPU; } +#undef testFeature + static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, unsigned *Features) { unsigned EAX, EBX; diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 40ee59c014b09f..6f8ce174ea4dae 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -145,7 +145,7 @@ static void doList(opt::InputArgList &Args) { return; Error Err = Error::success(); - object::Archive Archive(B.get()->getMemBufferRef(), Err); + object::Archive Archive(B->getMemBufferRef(), Err); fatalOpenError(std::move(Err), B->getBufferIdentifier()); std::vector Names; diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp index 230bb8b0a5dce2..e326f30ad88eeb 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CaptureTracking.h" @@ -130,7 +131,7 @@ class SanitizerBinaryMetadata { std::unique_ptr Ignorelist) : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()), - IRB(M.getContext()) { + VersionStr(utostr(getVersion())), IRB(M.getContext()) { // FIXME: Make it work with other formats. assert(TargetTriple.isOSBinFormatELF() && "ELF only"); assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) && @@ -167,10 +168,10 @@ class SanitizerBinaryMetadata { StringRef getSectionName(StringRef SectionSuffix); // Returns the section start marker name. - Twine getSectionStart(StringRef SectionSuffix); + StringRef getSectionStart(StringRef SectionSuffix); // Returns the section end marker name. - Twine getSectionEnd(StringRef SectionSuffix); + StringRef getSectionEnd(StringRef SectionSuffix); // Returns true if the access to the address should be considered "atomic". bool pretendAtomicAccess(const Value *Addr); @@ -179,6 +180,7 @@ class SanitizerBinaryMetadata { const SanitizerBinaryMetadataOptions Options; std::unique_ptr Ignorelist; const Triple TargetTriple; + const std::string VersionStr; IRBuilder<> IRB; BumpPtrAllocator Alloc; UniqueStringSaver StringPool{Alloc}; @@ -209,19 +211,25 @@ bool SanitizerBinaryMetadata::run() { getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy), getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy), }; + + // Calls to the initialization functions with different versions cannot be + // merged. Give the structors unique names based on the version, which will + // also be used as the COMDAT key. + const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str(); + // We declare the _add and _del functions as weak, and only call them if // there is a valid symbol linked. This allows building binaries with // semantic metadata, but without having callbacks. When a tool that wants // the metadata is linked which provides the callbacks, they will be called. Function *Ctor = createSanitizerCtorAndInitFunctions( - Mod, (MI->FunctionPrefix + ".module_ctor").str(), + Mod, StructorPrefix + ".module_ctor", (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) .first; Function *Dtor = createSanitizerCtorAndInitFunctions( - Mod, (MI->FunctionPrefix + ".module_dtor").str(), + Mod, StructorPrefix + ".module_dtor", (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) .first; @@ -454,15 +462,19 @@ SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { // FIXME: Other TargetTriples. // Request ULEB128 encoding for all integer constants. - return StringPool.save(SectionSuffix + "!C"); + return StringPool.save(SectionSuffix + VersionStr + "!C"); } -Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { - return "__start_" + SectionSuffix; +StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { + // Twine only concatenates 2 strings; with >2 strings, concatenating them + // creates Twine temporaries, and returning the final Twine no longer works + // because we'd end up with a stack-use-after-return. So here we also use the + // StringPool to store the new string. + return StringPool.save("__start_" + SectionSuffix + VersionStr); } -Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { - return "__stop_" + SectionSuffix; +StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { + return StringPool.save("__stop_" + SectionSuffix + VersionStr); } } // namespace diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index f921ee72a0a1ce..ed9c1828ce06a2 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -328,6 +328,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( ++I; if (!NotHoisted.count(&*Current)) { Current->moveBefore(ToBlock.getTerminator()); + Current->dropLocation(); } } return true; diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 4f36bac11e34b7..db0d40b317d179 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -1295,24 +1295,16 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { return (void)markConstant(&I, C); } - if (I.getDestTy()->isIntegerTy() && I.getSrcTy()->isIntOrIntVectorTy()) { + // Ignore bitcasts, as they may change the number of vector elements. + if (I.getDestTy()->isIntegerTy() && I.getSrcTy()->isIntOrIntVectorTy() && + I.getOpcode() != Instruction::BitCast) { auto &LV = getValueState(&I); ConstantRange OpRange = getConstantRange(OpSt, I.getSrcTy(), /*UndefAllowed=*/false); Type *DestTy = I.getDestTy(); - // Vectors where all elements have the same known constant range are treated - // as a single constant range in the lattice. When bitcasting such vectors, - // there is a mis-match between the width of the lattice value (single - // constant range) and the original operands (vector). Go to overdefined in - // that case. - if (I.getOpcode() == Instruction::BitCast && - I.getOperand(0)->getType()->isVectorTy() && - OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy)) - return (void)markOverdefined(&I); - ConstantRange Res = - OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy)); + OpRange.castOp(I.getOpcode(), DestTy->getScalarSizeInBits()); mergeInValue(LV, &I, ValueLatticeElement::getRange(Res)); } else markOverdefined(&I); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index d306524ae51135..f54eebb2874ab8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1543,7 +1543,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return Result; } -bool LoopVectorizationLegality::prepareToFoldTailByMasking() { +bool LoopVectorizationLegality::canFoldTailByMasking() const { LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); @@ -1586,23 +1586,31 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() { // The list of pointers that we can safely read and write to remains empty. SmallPtrSet SafePointers; - // Collect masked ops in temporary set first to avoid partially populating - // MaskedOp if a block cannot be predicated. + // Check all blocks for predication, including those that ordinarily do not + // need predication such as the header block. SmallPtrSet TmpMaskedOp; - - // Check and mark all blocks for predication, including those that ordinarily - // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) { - LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n"); + LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n"); return false; } } LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); - MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end()); return true; } +void LoopVectorizationLegality::prepareToFoldTailByMasking() { + // The list of pointers that we can safely read and write to remains empty. + SmallPtrSet SafePointers; + + // Mark all blocks for predication, including those that ordinarily do not + // need predication such as the header block. + for (BasicBlock *BB : TheLoop->blocks()) { + [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp); + assert(R && "Must be able to predicate block when tail-folding."); + } +} + } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 93f6d1d82e244c..de8dc8600e58b4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -226,7 +226,7 @@ class VPBuilder { /// TODO: The following VectorizationFactor was pulled out of /// LoopVectorizationCostModel class. LV also deals with -/// VectorizerParams::VectorizationFactor and VectorizationCostTy. +/// VectorizerParams::VectorizationFactor. /// We need to streamline them. /// Information about vectorization costs. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1423deb5a73f9a..208246f1a887e8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1090,7 +1090,7 @@ class LoopVectorizationCostModel { bool selectUserVectorizationFactor(ElementCount UserVF) { collectUniformsAndScalars(UserVF); collectInstsToScalarize(UserVF); - return expectedCost(UserVF).first.isValid(); + return expectedCost(UserVF).isValid(); } /// \return The size (in bits) of the smallest and widest types in the code @@ -1502,7 +1502,7 @@ class LoopVectorizationCostModel { /// \param UserIC User specific interleave count. void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) { assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet."); - if (!Legal->prepareToFoldTailByMasking()) { + if (!Legal->canFoldTailByMasking()) { ChosenTailFoldingStyle = std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); return; @@ -1591,20 +1591,13 @@ class LoopVectorizationCostModel { Scalars.clear(); } - /// The vectorization cost is a combination of the cost itself and a boolean - /// indicating whether any of the contributing operations will actually - /// operate on vector values after type legalization in the backend. If this - /// latter value is false, then all operations will be scalarized (i.e. no - /// vectorization has actually taken place). - using VectorizationCostTy = std::pair; - /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different /// vector widths. The cost that is returned is *not* normalized by /// the factor width. If \p Invalid is not nullptr, this function /// will add a pair(Instruction*, ElementCount) to \p Invalid for /// each instruction that has an Invalid cost for the given VF. - VectorizationCostTy + InstructionCost expectedCost(ElementCount VF, SmallVectorImpl *Invalid = nullptr); @@ -1642,12 +1635,7 @@ class LoopVectorizationCostModel { /// Returns the execution time cost of an instruction for a given vector /// width. Vector width of one means scalar. - VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); - - /// The cost-computation logic from getInstructionCost which provides - /// the vector type as an output parameter. - InstructionCost getInstructionCost(Instruction *I, ElementCount VF, - Type *&VectorTy); + InstructionCost getInstructionCost(Instruction *I, ElementCount VF); /// Return the cost of instructions in an inloop reduction pattern, if I is /// part of that pattern. @@ -4795,9 +4783,101 @@ static void emitInvalidCostRemarks(SmallVector InvalidCosts, } while (!Tail.empty()); } +/// Check if any recipe of \p Plan will generate a vector value, which will be +/// assigned a vector register. +static bool willGenerateVectors(VPlan &Plan, ElementCount VF, + const TargetTransformInfo &TTI) { + assert(VF.isVector() && "Checking a scalar VF?"); + VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), + Plan.getCanonicalIV()->getScalarType()->getContext()); + // Set of already visited types. + DenseSet Visited; + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { + for (VPRecipeBase &R : *VPBB) { + // Continue early if the recipe is considered to not produce a vector + // result. Note that this includes VPInstruction where some opcodes may + // produce a vector, to preserve existing behavior as VPInstructions model + // aspects not directly mapped to existing IR instructions. + switch (R.getVPDefID()) { + case VPDef::VPDerivedIVSC: + case VPDef::VPScalarIVStepsSC: + case VPDef::VPScalarCastSC: + case VPDef::VPReplicateSC: + case VPDef::VPInstructionSC: + case VPDef::VPCanonicalIVPHISC: + case VPDef::VPVectorPointerSC: + case VPDef::VPExpandSCEVSC: + case VPDef::VPEVLBasedIVPHISC: + case VPDef::VPPredInstPHISC: + case VPDef::VPBranchOnMaskSC: + continue; + case VPDef::VPReductionSC: + case VPDef::VPActiveLaneMaskPHISC: + case VPDef::VPWidenCallSC: + case VPDef::VPWidenCanonicalIVSC: + case VPDef::VPWidenCastSC: + case VPDef::VPWidenGEPSC: + case VPDef::VPWidenSC: + case VPDef::VPWidenSelectSC: + case VPDef::VPBlendSC: + case VPDef::VPFirstOrderRecurrencePHISC: + case VPDef::VPWidenPHISC: + case VPDef::VPWidenIntOrFpInductionSC: + case VPDef::VPWidenPointerInductionSC: + case VPDef::VPReductionPHISC: + case VPDef::VPInterleaveSC: + case VPDef::VPWidenLoadEVLSC: + case VPDef::VPWidenLoadSC: + case VPDef::VPWidenStoreEVLSC: + case VPDef::VPWidenStoreSC: + break; + default: + llvm_unreachable("unhandled recipe"); + } + + auto WillWiden = [&TTI, VF](Type *ScalarTy) { + Type *VectorTy = ToVectorTy(ScalarTy, VF); + unsigned NumLegalParts = TTI.getNumberOfParts(VectorTy); + if (!NumLegalParts) + return false; + if (VF.isScalable()) { + // is assumed to be profitable over iN because + // scalable registers are a distinct register class from scalar + // ones. If we ever find a target which wants to lower scalable + // vectors back to scalars, we'll need to update this code to + // explicitly ask TTI about the register class uses for each part. + return NumLegalParts <= VF.getKnownMinValue(); + } + // Two or more parts that share a register - are vectorized. + return NumLegalParts < VF.getKnownMinValue(); + }; + + // If no def nor is a store, e.g., branches, continue - no value to check. + if (R.getNumDefinedValues() == 0 && + !isa( + &R)) + continue; + // For multi-def recipes, currently only interleaved loads, suffice to + // check first def only. + // For stores check their stored value; for interleaved stores suffice + // the check first stored value only. In all cases this is the second + // operand. + VPValue *ToCheck = + R.getNumDefinedValues() >= 1 ? R.getVPValue(0) : R.getOperand(1); + Type *ScalarTy = TypeInfo.inferScalarType(ToCheck); + if (!Visited.insert({ScalarTy}).second) + continue; + if (WillWiden(ScalarTy)) + return true; + } + } + + return false; +} + VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { - InstructionCost ExpectedCost = - CM.expectedCost(ElementCount::getFixed(1)).first; + InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1)); LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n"); assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop"); assert(any_of(VPlans, @@ -4826,9 +4906,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { if (VF.isScalar()) continue; - LoopVectorizationCostModel::VectorizationCostTy C = - CM.expectedCost(VF, &InvalidCosts); - VectorizationFactor Candidate(VF, C.first, ScalarCost.ScalarCost); + InstructionCost C = CM.expectedCost(VF, &InvalidCosts); + VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost); #ifndef NDEBUG unsigned AssumedMinimumVscale = @@ -4845,7 +4924,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { LLVM_DEBUG(dbgs() << ".\n"); #endif - if (!C.second && !ForceVectorization) { + if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) { LLVM_DEBUG( dbgs() << "LV: Not considering vector loop of width " << VF @@ -5146,7 +5225,7 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. if (LoopCost == 0) { - LoopCost = expectedCost(VF).first; + LoopCost = expectedCost(VF); assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost"); // Loop body is free and there is no need for interleaving. @@ -5717,15 +5796,14 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( // Compute the cost of the vector instruction. Note that this cost already // includes the scalarization overhead of the predicated instruction. - InstructionCost VectorCost = getInstructionCost(I, VF).first; + InstructionCost VectorCost = getInstructionCost(I, VF); // Compute the cost of the scalarized instruction. This cost is the cost of // the instruction as if it wasn't if-converted and instead remained in the // predicated block. We will scale this cost by block probability after // computing the scalarization overhead. InstructionCost ScalarCost = - VF.getFixedValue() * - getInstructionCost(I, ElementCount::getFixed(1)).first; + VF.getFixedValue() * getInstructionCost(I, ElementCount::getFixed(1)); // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. @@ -5769,14 +5847,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( return Discount; } -LoopVectorizationCostModel::VectorizationCostTy -LoopVectorizationCostModel::expectedCost( +InstructionCost LoopVectorizationCostModel::expectedCost( ElementCount VF, SmallVectorImpl *Invalid) { - VectorizationCostTy Cost; + InstructionCost Cost; // For each block. for (BasicBlock *BB : TheLoop->blocks()) { - VectorizationCostTy BlockCost; + InstructionCost BlockCost; // For each instruction in the old loop. for (Instruction &I : BB->instructionsWithoutDebug()) { @@ -5785,22 +5862,19 @@ LoopVectorizationCostModel::expectedCost( (VF.isVector() && VecValuesToIgnore.count(&I))) continue; - VectorizationCostTy C = getInstructionCost(&I, VF); + InstructionCost C = getInstructionCost(&I, VF); // Check if we should override the cost. - if (C.first.isValid() && - ForceTargetInstructionCost.getNumOccurrences() > 0) - C.first = InstructionCost(ForceTargetInstructionCost); + if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0) + C = InstructionCost(ForceTargetInstructionCost); // Keep a list of instructions with invalid costs. - if (Invalid && !C.first.isValid()) + if (Invalid && !C.isValid()) Invalid->emplace_back(&I, VF); - BlockCost.first += C.first; - BlockCost.second |= C.second; - LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first - << " for VF " << VF << " For instruction: " << I - << '\n'); + BlockCost += C; + LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " + << VF << " For instruction: " << I << '\n'); } // If we are vectorizing a predicated block, it will have been @@ -5811,10 +5885,9 @@ LoopVectorizationCostModel::expectedCost( // cost by the probability of executing it. blockNeedsPredication from // Legal is used so as to not include all blocks in tail folded loops. if (VF.isScalar() && Legal->blockNeedsPredication(BB)) - BlockCost.first /= getReciprocalPredBlockProb(); + BlockCost /= getReciprocalPredBlockProb(); - Cost.first += BlockCost.first; - Cost.second |= BlockCost.second; + Cost += BlockCost; } return Cost; @@ -6213,49 +6286,6 @@ LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, return getWideningCost(I, VF); } -LoopVectorizationCostModel::VectorizationCostTy -LoopVectorizationCostModel::getInstructionCost(Instruction *I, - ElementCount VF) { - // If we know that this instruction will remain uniform, check the cost of - // the scalar version. - if (isUniformAfterVectorization(I, VF)) - VF = ElementCount::getFixed(1); - - if (VF.isVector() && isProfitableToScalarize(I, VF)) - return VectorizationCostTy(InstsToScalarize[VF][I], false); - - // Forced scalars do not have any scalarization overhead. - auto ForcedScalar = ForcedScalars.find(VF); - if (VF.isVector() && ForcedScalar != ForcedScalars.end()) { - auto InstSet = ForcedScalar->second; - if (InstSet.count(I)) - return VectorizationCostTy( - (getInstructionCost(I, ElementCount::getFixed(1)).first * - VF.getKnownMinValue()), - false); - } - - Type *VectorTy; - InstructionCost C = getInstructionCost(I, VF, VectorTy); - - bool TypeNotScalarized = false; - if (VF.isVector() && VectorTy->isVectorTy()) { - if (unsigned NumParts = TTI.getNumberOfParts(VectorTy)) { - if (VF.isScalable()) - // is assumed to be profitable over iN because - // scalable registers are a distinct register class from scalar ones. - // If we ever find a target which wants to lower scalable vectors - // back to scalars, we'll need to update this code to explicitly - // ask TTI about the register class uses for each part. - TypeNotScalarized = NumParts <= VF.getKnownMinValue(); - else - TypeNotScalarized = NumParts < VF.getKnownMinValue(); - } else - C = InstructionCost::getInvalid(); - } - return VectorizationCostTy(C, TypeNotScalarized); -} - InstructionCost LoopVectorizationCostModel::getScalarizationOverhead( Instruction *I, ElementCount VF, TTI::TargetCostKind CostKind) const { @@ -6646,8 +6676,25 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { } InstructionCost -LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, - Type *&VectorTy) { +LoopVectorizationCostModel::getInstructionCost(Instruction *I, + ElementCount VF) { + // If we know that this instruction will remain uniform, check the cost of + // the scalar version. + if (isUniformAfterVectorization(I, VF)) + VF = ElementCount::getFixed(1); + + if (VF.isVector() && isProfitableToScalarize(I, VF)) + return InstsToScalarize[VF][I]; + + // Forced scalars do not have any scalarization overhead. + auto ForcedScalar = ForcedScalars.find(VF); + if (VF.isVector() && ForcedScalar != ForcedScalars.end()) { + auto InstSet = ForcedScalar->second; + if (InstSet.count(I)) + return getInstructionCost(I, ElementCount::getFixed(1)) * + VF.getKnownMinValue(); + } + Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); @@ -6670,6 +6717,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, }; (void) hasSingleCopyAfterVectorization; + Type *VectorTy; if (isScalarAfterVectorization(I, VF)) { // With the exception of GEPs and PHIs, after scalarization there should // only be one copy of the instruction generated in the loop. This is @@ -6685,6 +6733,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, } else VectorTy = ToVectorTy(RetTy, VF); + if (VF.isVector() && VectorTy->isVectorTy() && + !TTI.getNumberOfParts(VectorTy)) + return InstructionCost::getInvalid(); + // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: @@ -7174,6 +7226,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { CM.invalidateCostModelingDecisions(); } + if (CM.foldTailByMasking()) + Legal->prepareToFoldTailByMasking(); + ElementCount MaxUserVF = UserVF.isScalable() ? MaxFactors.ScalableVF : MaxFactors.FixedVF; bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 64c5fb49ec85ce..5bcab7e929231c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -529,11 +529,17 @@ isFixedVectorShuffle(ArrayRef VL, SmallVectorImpl &Mask) { const auto *It = find_if(VL, IsaPred); if (It == VL.end()) return std::nullopt; - auto *EI0 = cast(*It); - if (isa(EI0->getVectorOperandType())) - return std::nullopt; unsigned Size = - cast(EI0->getVectorOperandType())->getNumElements(); + std::accumulate(VL.begin(), VL.end(), 0u, [](unsigned S, Value *V) { + auto *EI = dyn_cast(V); + if (!EI) + return S; + auto *VTy = dyn_cast(EI->getVectorOperandType()); + if (!VTy) + return S; + return std::max(S, VTy->getNumElements()); + }); + Value *Vec1 = nullptr; Value *Vec2 = nullptr; bool HasNonUndefVec = any_of(VL, [](Value *V) { @@ -563,8 +569,6 @@ isFixedVectorShuffle(ArrayRef VL, SmallVectorImpl &Mask) { if (isa(Vec)) { Mask[I] = I; } else { - if (cast(Vec->getType())->getNumElements() != Size) - return std::nullopt; if (isa(EI->getIndexOperand())) continue; auto *Idx = dyn_cast(EI->getIndexOperand()); @@ -1182,6 +1186,12 @@ class BoUpSLP { return VectorizableTree.front()->Scalars; } + /// Checks if the root graph node can be emitted with narrower bitwidth at + /// codegen and returns it signedness, if so. + bool isSignedMinBitwidthRootNode() const { + return MinBWs.at(VectorizableTree.front().get()).second; + } + /// Builds external uses of the vectorized scalars, i.e. the list of /// vectorized scalars to be extracted, their lanes and their scalar users. \p /// ExternallyUsedValues contains additional list of external uses to handle @@ -2203,6 +2213,27 @@ class BoUpSLP { return getNumLanes() == 2 || Cnt > 1; } + /// Checks if there is at least single compatible operand in lanes other + /// than \p Lane, compatible with the operand \p Op. + bool canBeVectorized(Instruction *Op, unsigned OpIdx, unsigned Lane) const { + bool OpAPO = getData(OpIdx, Lane).APO; + for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) { + if (Ln == Lane) + continue; + if (any_of(seq(getNumOperands()), [&](unsigned OpI) { + const OperandData &Data = getData(OpI, Ln); + if (Data.APO != OpAPO || Data.IsUsed) + return true; + Value *OpILn = getValue(OpI, Ln); + return (L && L->isLoopInvariant(OpILn)) || + (getSameOpcode({Op, OpILn}, TLI).getOpcode() && + Op->getParent() == cast(OpILn)->getParent()); + })) + return true; + } + return false; + } + public: /// Initialize with all the operands of the instruction vector \p RootVL. VLOperands(ArrayRef RootVL, const BoUpSLP &R) @@ -2258,14 +2289,14 @@ class BoUpSLP { // side. if (isa(OpLane0)) ReorderingModes[OpIdx] = ReorderingMode::Load; - else if (isa(OpLane0)) { + else if (auto *OpILane0 = dyn_cast(OpLane0)) { // Check if OpLane0 should be broadcast. - if (shouldBroadcast(OpLane0, OpIdx, FirstLane)) + if (shouldBroadcast(OpLane0, OpIdx, FirstLane) || + !canBeVectorized(OpILane0, OpIdx, FirstLane)) ReorderingModes[OpIdx] = ReorderingMode::Splat; else ReorderingModes[OpIdx] = ReorderingMode::Opcode; - } - else if (isa(OpLane0)) + } else if (isa(OpLane0)) ReorderingModes[OpIdx] = ReorderingMode::Constant; else if (isa(OpLane0)) // Our best hope is a Splat. It may save some cost in some cases. @@ -2449,6 +2480,90 @@ class BoUpSLP { DeletedInstructions.insert(I); } + /// Remove instructions from the parent function and clear the operands of \p + /// DeadVals instructions, marking for deletion trivially dead operands. + template + void removeInstructionsAndOperands(ArrayRef DeadVals) { + SmallVector DeadInsts; + for (T *V : DeadVals) { + auto *I = cast(V); + DeletedInstructions.insert(I); + } + for (T *V : DeadVals) { + if (!V) + continue; + auto *I = cast(V); + salvageDebugInfo(*I); + SmallVector Entries; + if (const TreeEntry *Entry = getTreeEntry(I)) { + Entries.push_back(Entry); + auto It = MultiNodeScalars.find(I); + if (It != MultiNodeScalars.end()) + Entries.append(It->second.begin(), It->second.end()); + } + for (Use &U : I->operands()) { + if (auto *OpI = dyn_cast_if_present(U.get()); + OpI && !DeletedInstructions.contains(OpI) && OpI->hasOneUser() && + wouldInstructionBeTriviallyDead(OpI, TLI) && + (Entries.empty() || none_of(Entries, [&](const TreeEntry *Entry) { + return Entry->VectorizedValue == OpI; + }))) + DeadInsts.push_back(OpI); + } + I->dropAllReferences(); + } + for (T *V : DeadVals) { + auto *I = cast(V); + if (!I->getParent()) + continue; + assert((I->use_empty() || all_of(I->uses(), + [&](Use &U) { + return isDeleted( + cast(U.getUser())); + })) && + "trying to erase instruction with users."); + I->removeFromParent(); + SE->forgetValue(I); + } + // Process the dead instruction list until empty. + while (!DeadInsts.empty()) { + Value *V = DeadInsts.pop_back_val(); + Instruction *VI = cast_or_null(V); + if (!VI || !VI->getParent()) + continue; + assert(isInstructionTriviallyDead(VI, TLI) && + "Live instruction found in dead worklist!"); + assert(VI->use_empty() && "Instructions with uses are not dead."); + + // Don't lose the debug info while deleting the instructions. + salvageDebugInfo(*VI); + + // Null out all of the instruction's operands to see if any operand + // becomes dead as we go. + for (Use &OpU : VI->operands()) { + Value *OpV = OpU.get(); + if (!OpV) + continue; + OpU.set(nullptr); + + if (!OpV->use_empty()) + continue; + + // If the operand is an instruction that became dead as we nulled out + // the operand, and if it is 'trivially' dead, delete it in a future + // loop iteration. + if (auto *OpI = dyn_cast(OpV)) + if (!DeletedInstructions.contains(OpI) && + isInstructionTriviallyDead(OpI, TLI)) + DeadInsts.push_back(OpI); + } + + VI->removeFromParent(); + DeletedInstructions.insert(VI); + SE->forgetValue(VI); + } + } + /// Checks if the instruction was already analyzed for being possible /// reduction root. bool isAnalyzedReductionRoot(Instruction *I) const { @@ -3983,6 +4098,10 @@ template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { BoUpSLP::~BoUpSLP() { SmallVector DeadInsts; for (auto *I : DeletedInstructions) { + if (!I->getParent()) { + I->insertBefore(F->getEntryBlock().getTerminator()); + continue; + } for (Use &U : I->operands()) { auto *Op = dyn_cast(U.get()); if (Op && !DeletedInstructions.count(Op) && Op->hasOneUser() && @@ -6725,9 +6844,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, bool IsScatterVectorizeUserTE = UserTreeIdx.UserTE && UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; - bool AreAllSameInsts = - (S.getOpcode() && allSameBlock(VL)) || - (S.OpValue->getType()->isPointerTy() && IsScatterVectorizeUserTE && + bool AreAllSameBlock = S.getOpcode() && allSameBlock(VL); + bool AreScatterAllGEPSameBlock = + (IsScatterVectorizeUserTE && S.OpValue->getType()->isPointerTy() && VL.size() > 2 && all_of(VL, [&BB](Value *V) { @@ -6741,6 +6860,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, BB && sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE, SortedIndices)); + bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock; if (!AreAllSameInsts || allConstant(VL) || isSplat(VL) || (isa( S.OpValue) && @@ -6820,9 +6940,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Special processing for sorted pointers for ScatterVectorize node with // constant indeces only. - if (AreAllSameInsts && UserTreeIdx.UserTE && - UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize && - !(S.getOpcode() && allSameBlock(VL))) { + if (!AreAllSameBlock && AreScatterAllGEPSameBlock) { assert(S.OpValue->getType()->isPointerTy() && count_if(VL, IsaPred) >= 2 && "Expected pointers only."); @@ -8001,7 +8119,7 @@ getGEPCosts(const TargetTransformInfo &TTI, ArrayRef Ptrs, void BoUpSLP::transformNodes() { constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; for (std::unique_ptr &TE : VectorizableTree) { - TreeEntry &E = *TE.get(); + TreeEntry &E = *TE; switch (E.getOpcode()) { case Instruction::Load: { // No need to reorder masked gather loads, just reorder the scalar @@ -10462,7 +10580,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { } // Add reduced value cost, if resized. if (!VectorizedVals.empty()) { - const TreeEntry &Root = *VectorizableTree.front().get(); + const TreeEntry &Root = *VectorizableTree.front(); auto BWIt = MinBWs.find(&Root); if (BWIt != MinBWs.end()) { Type *DstTy = Root.Scalars.front()->getType(); @@ -10570,7 +10688,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { // Add the cost for reduced value resize (if required). if (ReductionBitWidth != 0) { assert(UserIgnoreList && "Expected reduction tree."); - const TreeEntry &E = *VectorizableTree.front().get(); + const TreeEntry &E = *VectorizableTree.front(); auto It = MinBWs.find(&E); if (It != MinBWs.end() && It->second.first != ReductionBitWidth) { unsigned SrcSize = It->second.first; @@ -10658,36 +10776,20 @@ BoUpSLP::tryToGatherSingleRegisterExtractElements( VectorOpToIdx[EI->getVectorOperand()].push_back(I); } // Sort the vector operands by the maximum number of uses in extractelements. - MapVector> VFToVector; - for (const auto &Data : VectorOpToIdx) - VFToVector[cast(Data.first->getType())->getNumElements()] - .push_back(Data.first); - for (auto &Data : VFToVector) { - stable_sort(Data.second, [&VectorOpToIdx](Value *V1, Value *V2) { - return VectorOpToIdx.find(V1)->second.size() > - VectorOpToIdx.find(V2)->second.size(); - }); - } - // Find the best pair of the vectors with the same number of elements or a - // single vector. + SmallVector>> Vectors = + VectorOpToIdx.takeVector(); + stable_sort(Vectors, [](const auto &P1, const auto &P2) { + return P1.second.size() > P2.second.size(); + }); + // Find the best pair of the vectors or a single vector. const int UndefSz = UndefVectorExtracts.size(); unsigned SingleMax = 0; - Value *SingleVec = nullptr; unsigned PairMax = 0; - std::pair PairVec(nullptr, nullptr); - for (auto &Data : VFToVector) { - Value *V1 = Data.second.front(); - if (SingleMax < VectorOpToIdx[V1].size() + UndefSz) { - SingleMax = VectorOpToIdx[V1].size() + UndefSz; - SingleVec = V1; - } - Value *V2 = nullptr; - if (Data.second.size() > 1) - V2 = *std::next(Data.second.begin()); - if (V2 && PairMax < VectorOpToIdx[V1].size() + VectorOpToIdx[V2].size() + - UndefSz) { - PairMax = VectorOpToIdx[V1].size() + VectorOpToIdx[V2].size() + UndefSz; - PairVec = std::make_pair(V1, V2); + if (!Vectors.empty()) { + SingleMax = Vectors.front().second.size() + UndefSz; + if (Vectors.size() > 1) { + auto *ItNext = std::next(Vectors.begin()); + PairMax = SingleMax + ItNext->second.size(); } } if (SingleMax == 0 && PairMax == 0 && UndefSz == 0) @@ -10698,11 +10800,11 @@ BoUpSLP::tryToGatherSingleRegisterExtractElements( SmallVector GatheredExtracts( VL.size(), PoisonValue::get(VL.front()->getType())); if (SingleMax >= PairMax && SingleMax) { - for (int Idx : VectorOpToIdx[SingleVec]) + for (int Idx : Vectors.front().second) std::swap(GatheredExtracts[Idx], VL[Idx]); - } else { - for (Value *V : {PairVec.first, PairVec.second}) - for (int Idx : VectorOpToIdx[V]) + } else if (!Vectors.empty()) { + for (unsigned Idx : {0, 1}) + for (int Idx : Vectors[Idx].second) std::swap(GatheredExtracts[Idx], VL[Idx]); } // Add extracts from undefs too. @@ -11771,25 +11873,29 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { MutableArrayRef SubMask = Mask.slice(Part * SliceSize, Limit); constexpr int MaxBases = 2; SmallVector Bases(MaxBases); -#ifndef NDEBUG - int PrevSize = 0; -#endif // NDEBUG - for (const auto [I, V]: enumerate(VL)) { - if (SubMask[I] == PoisonMaskElem) + auto VLMask = zip(VL, SubMask); + const unsigned VF = std::accumulate( + VLMask.begin(), VLMask.end(), 0U, [&](unsigned S, const auto &D) { + if (std::get<1>(D) == PoisonMaskElem) + return S; + Value *VecOp = + cast(std::get<0>(D))->getVectorOperand(); + if (const TreeEntry *TE = R.getTreeEntry(VecOp)) + VecOp = TE->VectorizedValue; + assert(VecOp && "Expected vectorized value."); + const unsigned Size = + cast(VecOp->getType())->getNumElements(); + return std::max(S, Size); + }); + for (const auto [V, I] : VLMask) { + if (I == PoisonMaskElem) continue; Value *VecOp = cast(V)->getVectorOperand(); if (const TreeEntry *TE = R.getTreeEntry(VecOp)) VecOp = TE->VectorizedValue; assert(VecOp && "Expected vectorized value."); - const int Size = - cast(VecOp->getType())->getNumElements(); -#ifndef NDEBUG - assert((PrevSize == Size || PrevSize == 0) && - "Expected vectors of the same size."); - PrevSize = Size; -#endif // NDEBUG VecOp = castToScalarTyElem(VecOp); - Bases[SubMask[I] < Size ? 0 : 1] = VecOp; + Bases[I / VF] = VecOp; } if (!Bases.front()) continue; @@ -11815,16 +11921,17 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { "Expected first part or all previous parts masked."); copy(SubMask, std::next(VecMask.begin(), Part * SliceSize)); } else { - unsigned VF = cast(Vec->getType())->getNumElements(); + unsigned NewVF = + cast(Vec->getType())->getNumElements(); if (Vec->getType() != SubVec->getType()) { unsigned SubVecVF = cast(SubVec->getType())->getNumElements(); - VF = std::max(VF, SubVecVF); + NewVF = std::max(NewVF, SubVecVF); } // Adjust SubMask. for (int &Idx : SubMask) if (Idx != PoisonMaskElem) - Idx += VF; + Idx += NewVF; copy(SubMask, std::next(VecMask.begin(), Part * SliceSize)); Vec = createShuffle(Vec, SubVec, VecMask); TransformToIdentity(VecMask); @@ -14083,11 +14190,8 @@ Value *BoUpSLP::vectorizeTree( } #endif LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); - eraseInstruction(cast(Scalar)); - // Retain to-be-deleted instructions for some debug-info - // bookkeeping. NOTE: eraseInstruction only marks the instruction for - // deletion - instructions are not deleted until later. - RemovedInsts.push_back(cast(Scalar)); + auto *I = cast(Scalar); + RemovedInsts.push_back(I); } } @@ -14096,10 +14200,26 @@ Value *BoUpSLP::vectorizeTree( if (auto *V = dyn_cast(VectorizableTree[0]->VectorizedValue)) V->mergeDIAssignID(RemovedInsts); + // Clear up reduction references, if any. + if (UserIgnoreList) { + for (Instruction *I : RemovedInsts) { + if (getTreeEntry(I)->Idx != 0) + continue; + I->replaceUsesWithIf(PoisonValue::get(I->getType()), [&](Use &U) { + return UserIgnoreList->contains(U.getUser()); + }); + } + } + // Retain to-be-deleted instructions for some debug-info bookkeeping and alias + // cache correctness. + // NOTE: removeInstructionAndOperands only marks the instruction for deletion + // - instructions are not deleted until later. + removeInstructionsAndOperands(ArrayRef(RemovedInsts)); + Builder.ClearInsertionPoint(); InstrElementSize.clear(); - const TreeEntry &RootTE = *VectorizableTree.front().get(); + const TreeEntry &RootTE = *VectorizableTree.front(); Value *Vec = RootTE.VectorizedValue; if (auto It = MinBWs.find(&RootTE); ReductionBitWidth != 0 && It != MinBWs.end() && @@ -15486,8 +15606,8 @@ void BoUpSLP::computeMinimumValueSizes() { VectorizableTree.front()->Scalars.front()->getType())) Limit = 3; unsigned MaxBitWidth = ComputeMaxBitWidth( - *VectorizableTree[NodeIdx].get(), IsTopRoot, IsProfitableToDemoteRoot, - Opcode, Limit, IsTruncRoot, IsSignedCmp); + *VectorizableTree[NodeIdx], IsTopRoot, IsProfitableToDemoteRoot, Opcode, + Limit, IsTruncRoot, IsSignedCmp); if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) { if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth) ReductionBitWidth = bit_ceil(MaxBitWidth); @@ -16145,15 +16265,18 @@ bool SLPVectorizerPass::vectorizeStores( Res.first = Idx; Res.second.emplace(Idx, 0); }; - StoreInst *PrevStore = Stores.front(); + Type *PrevValTy = nullptr; for (auto [I, SI] : enumerate(Stores)) { + if (R.isDeleted(SI)) + continue; + if (!PrevValTy) + PrevValTy = SI->getValueOperand()->getType(); // Check that we do not try to vectorize stores of different types. - if (PrevStore->getValueOperand()->getType() != - SI->getValueOperand()->getType()) { + if (PrevValTy != SI->getValueOperand()->getType()) { for (auto &Set : SortedStores) TryToVectorize(Set.second); SortedStores.clear(); - PrevStore = SI; + PrevValTy = SI->getValueOperand()->getType(); } FillStoresSet(I, SI); } @@ -17036,9 +17159,12 @@ class HorizontalReduction { Value *VectorizedTree = nullptr; bool CheckForReusedReductionOps = false; // Try to vectorize elements based on their type. + SmallVector States; + for (ArrayRef RV : ReducedVals) + States.push_back(getSameOpcode(RV, TLI)); for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) { ArrayRef OrigReducedVals = ReducedVals[I]; - InstructionsState S = getSameOpcode(OrigReducedVals, TLI); + InstructionsState S = States[I]; SmallVector Candidates; Candidates.reserve(2 * OrigReducedVals.size()); DenseMap TrackedToOrig(2 * OrigReducedVals.size()); @@ -17363,14 +17489,11 @@ class HorizontalReduction { Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); if (ReducedSubTree->getType() != VL.front()->getType()) { - ReducedSubTree = Builder.CreateIntCast( - ReducedSubTree, VL.front()->getType(), any_of(VL, [&](Value *R) { - KnownBits Known = computeKnownBits( - R, cast(ReductionOps.front().front()) - ->getModule() - ->getDataLayout()); - return !Known.isNonNegative(); - })); + assert(ReducedSubTree->getType() != VL.front()->getType() && + "Expected different reduction type."); + ReducedSubTree = + Builder.CreateIntCast(ReducedSubTree, VL.front()->getType(), + V.isSignedMinBitwidthRootNode()); } // Improved analysis for add/fadd/xor reductions with same scale factor @@ -17532,11 +17655,11 @@ class HorizontalReduction { } #endif if (!Ignore->use_empty()) { - Value *Undef = UndefValue::get(Ignore->getType()); - Ignore->replaceAllUsesWith(Undef); + Value *P = PoisonValue::get(Ignore->getType()); + Ignore->replaceAllUsesWith(P); } - V.eraseInstruction(cast(Ignore)); } + V.removeInstructionsAndOperands(RdxOps); } } else if (!CheckForReusedReductionOps) { for (ReductionOpsType &RdxOps : ReductionOps) @@ -18084,6 +18207,8 @@ bool SLPVectorizerPass::vectorizeHorReduction( Stack.emplace(I, Level); continue; } + if (R.isDeleted(Inst)) + continue; } else { // We could not vectorize `Inst` so try to use it as a future seed. if (!TryAppendToPostponedInsts(Inst)) { @@ -18169,15 +18294,28 @@ static bool tryToVectorizeSequence( // Try to vectorize elements base on their type. SmallVector Candidates; - for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E;) { + SmallVector VL; + for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E; + VL.clear()) { // Look for the next elements with the same type, parent and operand // kinds. + auto *I = dyn_cast(*IncIt); + if (!I || R.isDeleted(I)) { + ++IncIt; + continue; + } auto *SameTypeIt = IncIt; - while (SameTypeIt != E && AreCompatible(*SameTypeIt, *IncIt)) + while (SameTypeIt != E && (!isa(*SameTypeIt) || + R.isDeleted(cast(*SameTypeIt)) || + AreCompatible(*SameTypeIt, *IncIt))) { + auto *I = dyn_cast(*SameTypeIt); ++SameTypeIt; + if (I && !R.isDeleted(I)) + VL.push_back(cast(I)); + } // Try to vectorize them. - unsigned NumElts = (SameTypeIt - IncIt); + unsigned NumElts = VL.size(); LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at nodes (" << NumElts << ")\n"); // The vectorization is a 3-state attempt: @@ -18189,10 +18327,15 @@ static bool tryToVectorizeSequence( // 3. Final attempt to try to vectorize all instructions with the // same/alternate ops only, this may result in some extra final // vectorization. - if (NumElts > 1 && - TryToVectorizeHelper(ArrayRef(IncIt, NumElts), MaxVFOnly)) { + if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(VL), MaxVFOnly)) { // Success start over because instructions might have been changed. Changed = true; + VL.swap(Candidates); + Candidates.clear(); + for (T *V : VL) { + if (auto *I = dyn_cast(V); I && !R.isDeleted(I)) + Candidates.push_back(V); + } } else { /// \Returns the minimum number of elements that we will attempt to /// vectorize. @@ -18203,7 +18346,10 @@ static bool tryToVectorizeSequence( if (NumElts < GetMinNumElements(*IncIt) && (Candidates.empty() || Candidates.front()->getType() == (*IncIt)->getType())) { - Candidates.append(IncIt, std::next(IncIt, NumElts)); + for (T *V : VL) { + if (auto *I = dyn_cast(V); I && !R.isDeleted(I)) + Candidates.push_back(V); + } } } // Final attempt to vectorize instructions with the same types. @@ -18214,13 +18360,26 @@ static bool tryToVectorizeSequence( Changed = true; } else if (MaxVFOnly) { // Try to vectorize using small vectors. - for (auto *It = Candidates.begin(), *End = Candidates.end(); - It != End;) { + SmallVector VL; + for (auto *It = Candidates.begin(), *End = Candidates.end(); It != End; + VL.clear()) { + auto *I = dyn_cast(*It); + if (!I || R.isDeleted(I)) { + ++It; + continue; + } auto *SameTypeIt = It; - while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It)) + while (SameTypeIt != End && + (!isa(*SameTypeIt) || + R.isDeleted(cast(*SameTypeIt)) || + AreCompatible(*SameTypeIt, *It))) { + auto *I = dyn_cast(*SameTypeIt); ++SameTypeIt; - unsigned NumElts = (SameTypeIt - It); - if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(It, NumElts), + if (I && !R.isDeleted(I)) + VL.push_back(cast(I)); + } + unsigned NumElts = VL.size(); + if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(VL), /*MaxVFOnly=*/false)) Changed = true; It = SameTypeIt; @@ -18494,7 +18653,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } return false; }; - auto AreCompatiblePHIs = [&PHIToOpcodes, this](Value *V1, Value *V2) { + auto AreCompatiblePHIs = [&PHIToOpcodes, this, &R](Value *V1, Value *V2) { if (V1 == V2) return true; if (V1->getType() != V2->getType()) @@ -18509,6 +18668,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { continue; if (auto *I1 = dyn_cast(Opcodes1[I])) if (auto *I2 = dyn_cast(Opcodes2[I])) { + if (R.isDeleted(I1) || R.isDeleted(I2)) + return false; if (I1->getParent() != I2->getParent()) return false; InstructionsState S = getSameOpcode({I1, I2}, *TLI); @@ -18729,8 +18890,13 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { // are trying to vectorize the index computations, so the maximum number of // elements is based on the size of the index expression, rather than the // size of the GEP itself (the target's pointer size). + auto *It = find_if(Entry.second, [&](GetElementPtrInst *GEP) { + return !R.isDeleted(GEP); + }); + if (It == Entry.second.end()) + continue; unsigned MaxVecRegSize = R.getMaxVecRegSize(); - unsigned EltSize = R.getVectorElementSize(*Entry.second[0]->idx_begin()); + unsigned EltSize = R.getVectorElementSize(*(*It)->idx_begin()); if (MaxVecRegSize < EltSize) continue; diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index eca5d1d4c5e1de..c4b096d6531584 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -54,6 +54,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { return ResTy; } case Instruction::ICmp: + case VPInstruction::ActiveLaneMask: + return inferScalarType(R->getOperand(1)); case VPInstruction::FirstOrderRecurrenceSplice: case VPInstruction::Not: return SetResultTyFromOp(); @@ -68,6 +70,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::PtrAdd: // Return the type based on the pointer argument (i.e. first operand). return inferScalarType(R->getOperand(0)); + case VPInstruction::BranchOnCond: + case VPInstruction::BranchOnCount: + return Type::getVoidTy(Ctx); default: break; } @@ -237,19 +242,21 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { Type *ResultTy = TypeSwitch(V->getDefiningRecipe()) - .Case([this](const auto *R) { - // Handle header phi recipes, except VPWidenIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + .Case( + [this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) - .Case([this](const VPRecipeBase *R) { + .Case([this](const VPRecipeBase *R) { return inferScalarType(R->getOperand(0)); }) .Case OldMask; - if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)), - m_Mask(OldMask)))) + if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask)))) return false; auto *C0 = dyn_cast(V0); @@ -1551,11 +1550,13 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { // Try to replace a castop with a shuffle if the shuffle is not costly. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - InstructionCost OldCost = + InstructionCost CostC0 = TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy, - TTI::CastContextHint::None, CostKind) + + TTI::CastContextHint::None, CostKind); + InstructionCost CostC1 = TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, TTI::CastContextHint::None, CostKind); + InstructionCost OldCost = CostC0 + CostC1; OldCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, CastDstTy, OldMask, CostKind, 0, nullptr, std::nullopt, &I); @@ -1564,6 +1565,10 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { TargetTransformInfo::SK_PermuteTwoSrc, CastSrcTy, NewMask, CostKind); NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy, TTI::CastContextHint::None, CostKind); + if (!C0->hasOneUse()) + NewCost += CostC0; + if (!C1->hasOneUse()) + NewCost += CostC1; LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll index c97a00ccdd4557..2b9ef7acd4a4d7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -18,8 +18,8 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) { ; ; GISEL-LABEL: combine_vec_udiv_uniform: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI0_0 -; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] +; GISEL-NEXT: mov w8, #25645 // =0x642d +; GISEL-NEXT: dup v1.8h, w8 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir index 4cd6eef531ce08..66c8c2efda9bc1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir @@ -16,10 +16,11 @@ body: | ; CHECK-LABEL: name: v4s32_gpr ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) - ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 %4:_(<4 x s32>) = G_DUP %0(s32) $q0 = COPY %4(<4 x s32>) @@ -37,10 +38,11 @@ body: | ; CHECK-LABEL: name: v4s64_gpr ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $x0 %4:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %4(<2 x s64>) @@ -58,10 +60,11 @@ body: | ; CHECK-LABEL: name: v2s32_gpr ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) - ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $w0 %4:_(<2 x s32>) = G_DUP %0(s32) $d0 = COPY %4(<2 x s32>) @@ -79,10 +82,11 @@ body: | ; CHECK-LABEL: name: v4s32_fpr ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) - ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $s0 %4:_(<4 x s32>) = G_DUP %0(s32) $q0 = COPY %4(<4 x s32>) @@ -100,10 +104,11 @@ body: | ; CHECK-LABEL: name: v2s64_fpr ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $d0 %4:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %4(<2 x s64>) @@ -121,10 +126,11 @@ body: | ; CHECK-LABEL: name: v2s32_fpr ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) - ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $s0 %4:_(<2 x s32>) = G_DUP %0(s32) $d0 = COPY %4(<2 x s32>) @@ -142,10 +148,11 @@ body: | ; CHECK-LABEL: name: v2s64_fpr_copy ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $d0 %6:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %6(<2 x s64>) @@ -163,11 +170,13 @@ body: | ; CHECK-LABEL: name: v416s8_gpr ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8) - ; CHECK: $q0 = COPY [[DUP]](<16 x s8>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8) + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 %trunc:_(s8) = G_TRUNC %0(s32) %1:_(<16 x s8>) = G_DUP %trunc(s8) diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 307aa397eabbbe..5aff8e03514879 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -994,9 +994,9 @@ define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind { ; ; CHECK-GI-LABEL: smull_noextvec_v8i8_v8i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: mov w8, #-999 // =0xfffffc19 ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: dup v1.8h, w8 ; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret %tmp3 = sext <8 x i8> %arg to <8 x i16> @@ -1088,29 +1088,13 @@ define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind { define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind { ; Do not use SMULL if the BUILD_VECTOR element values are too big. -; CHECK-NEON-LABEL: umull_noextvec_v8i8_v8i16: -; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: mov w8, #999 // =0x3e7 -; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEON-NEXT: dup v1.8h, w8 -; CHECK-NEON-NEXT: mul v0.8h, v0.8h, v1.8h -; CHECK-NEON-NEXT: ret -; -; CHECK-SVE-LABEL: umull_noextvec_v8i8_v8i16: -; CHECK-SVE: // %bb.0: -; CHECK-SVE-NEXT: mov w8, #999 // =0x3e7 -; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-SVE-NEXT: dup v1.8h, w8 -; CHECK-SVE-NEXT: mul v0.8h, v0.8h, v1.8h -; CHECK-SVE-NEXT: ret -; -; CHECK-GI-LABEL: umull_noextvec_v8i8_v8i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI38_0 -; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0] -; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: ret +; CHECK-LABEL: umull_noextvec_v8i8_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #999 // =0x3e7 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret %tmp3 = zext <8 x i8> %arg to <8 x i16> %tmp4 = mul <8 x i16> %tmp3, ret <8 x i16> %tmp4 diff --git a/llvm/test/CodeGen/AArch64/arm64-hlt.ll b/llvm/test/CodeGen/AArch64/arm64-hlt.ll new file mode 100644 index 00000000000000..b16d9a0426f0d4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-hlt.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s + +define void @foo() nounwind { +; CHECK-LABEL: foo +; CHECK: hlt #0x2 + tail call void @llvm.aarch64.hlt(i32 2) + ret void +} + +declare void @llvm.aarch64.hlt(i32 immarg) nounwind diff --git a/llvm/test/CodeGen/AArch64/asm-ld1-wrong-constraint.ll b/llvm/test/CodeGen/AArch64/asm-ld1-wrong-constraint.ll new file mode 100644 index 00000000000000..b62c18ccfbcd29 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/asm-ld1-wrong-constraint.ll @@ -0,0 +1,23 @@ +; RUN: not llc < %s -o - 2>&1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx" + +%struct.uint64x2x4_t = type { [4 x <2 x i64>] } + +define i64 @rdar130887714(ptr noundef %0) { + %2 = alloca ptr, align 8 + %3 = alloca %struct.uint64x2x4_t, align 16 + store ptr %0, ptr %2, align 8 + %4 = load ptr, ptr %2, align 8 + call void asm sideeffect "ld1 { $0.2d, ${0:T}.2d, ${0:U}.2d, ${0:V}.2d} , [$1]", "*w,r"(ptr elementtype(%struct.uint64x2x4_t) %3, ptr %4) #0, !srcloc !0 +; CHECK: error: Don't know how to handle indirect register inputs yet for constraint 'w' at line 250 + + %5 = getelementptr inbounds %struct.uint64x2x4_t, ptr %3, i32 0, i32 0 + %6 = getelementptr inbounds [4 x <2 x i64>], ptr %5, i64 0, i64 0 + %7 = load <2 x i64>, ptr %6, align 16 + %8 = extractelement <2 x i64> %7, i32 1 + ret i64 %8 +} + +!0 = !{i64 250} \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll index 6da98142573f60..cc6bd766eed78c 100644 --- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll +++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll @@ -1,12 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "arm64" define i1 @test_EQ_IllEbT(i64 %a, i64 %b) { -; CHECK-LABEL: test_EQ_IllEbT -; CHECK: cmn x1, x0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IllEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x1, x0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %add = sub i64 0, %b %cmp = icmp eq i64 %add, %a @@ -14,10 +16,11 @@ entry: } define i1 @test_EQ_IliEbT(i64 %a, i32 %b) { -; CHECK-LABEL: test_EQ_IliEbT -; CHECK: cmn x0, w1, sxtw -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IliEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x0, w1, sxtw +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = sext i32 %b to i64 %add = sub i64 0, %a @@ -26,10 +29,12 @@ entry: } define i1 @test_EQ_IlsEbT(i64 %a, i16 %b) { -; CHECK-LABEL: test_EQ_IlsEbT -; CHECK: cmn x0, w1, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IlsEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: cmn x0, w1, sxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = sext i16 %b to i64 %add = sub i64 0, %a @@ -38,10 +43,12 @@ entry: } define i1 @test_EQ_IlcEbT(i64 %a, i8 %b) { -; CHECK-LABEL: test_EQ_IlcEbT -; CHECK: cmn x0, w1, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IlcEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: cmn x0, w1, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = zext i8 %b to i64 %add = sub i64 0, %a @@ -50,10 +57,11 @@ entry: } define i1 @test_EQ_IilEbT(i32 %a, i64 %b) { -; CHECK-LABEL: test_EQ_IilEbT -; CHECK: cmn x1, w0, sxtw -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IilEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x1, w0, sxtw +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = sext i32 %a to i64 %add = sub i64 0, %b @@ -62,10 +70,11 @@ entry: } define i1 @test_EQ_IiiEbT(i32 %a, i32 %b) { -; CHECK-LABEL: test_EQ_IiiEbT -; CHECK: cmn w1, w0 -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IiiEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w1, w0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %add = sub i32 0, %b %cmp = icmp eq i32 %add, %a @@ -73,10 +82,11 @@ entry: } define i1 @test_EQ_IisEbT(i32 %a, i16 %b) { -; CHECK-LABEL: test_EQ_IisEbT -; CHECK: cmn w0, w1, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IisEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w0, w1, sxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = sext i16 %b to i32 %add = sub i32 0, %a @@ -85,10 +95,11 @@ entry: } define i1 @test_EQ_IicEbT(i32 %a, i8 %b) { -; CHECK-LABEL: test_EQ_IicEbT -; CHECK: cmn w0, w1, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IicEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w0, w1, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = zext i8 %b to i32 %add = sub i32 0, %a @@ -97,10 +108,12 @@ entry: } define i1 @test_EQ_IslEbT(i16 %a, i64 %b) { -; CHECK-LABEL: test_EQ_IslEbT -; CHECK: cmn x1, w0, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IslEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: cmn x1, w0, sxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = sext i16 %a to i64 %add = sub i64 0, %b @@ -109,10 +122,11 @@ entry: } define i1 @test_EQ_IsiEbT(i16 %a, i32 %b) { -; CHECK-LABEL: test_EQ_IsiEbT -; CHECK: cmn w1, w0, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IsiEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w1, w0, sxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = sext i16 %a to i32 %add = sub i32 0, %b @@ -121,11 +135,12 @@ entry: } define i1 @test_EQ_IssEbT(i16 %a, i16 %b) { -; CHECK-LABEL: test_EQ_IssEbT -; CHECK: sxth w8, w1 -; CHECK-NEXT: cmn w8, w0, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IssEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: cmn w8, w0, sxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = sext i16 %a to i32 %conv1 = sext i16 %b to i32 @@ -135,11 +150,12 @@ entry: } define i1 @test_EQ_IscEbT(i16 %a, i8 %b) { -; CHECK-LABEL: test_EQ_IscEbT -; CHECK: and w8, w1, #0xff -; CHECK-NEXT: cmn w8, w0, sxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IscEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: cmn w8, w0, sxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = sext i16 %a to i32 %conv1 = zext i8 %b to i32 @@ -149,10 +165,12 @@ entry: } define i1 @test_EQ_IclEbT(i8 %a, i64 %b) { -; CHECK-LABEL: test_EQ_IclEbT -; CHECK: cmn x1, w0, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IclEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: cmn x1, w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i64 %add = sub i64 0, %b @@ -161,10 +179,11 @@ entry: } define i1 @test_EQ_IciEbT(i8 %a, i32 %b) { -; CHECK-LABEL: test_EQ_IciEbT -; CHECK: cmn w1, w0, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IciEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w1, w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i32 %add = sub i32 0, %b @@ -173,11 +192,12 @@ entry: } define i1 @test_EQ_IcsEbT(i8 %a, i16 %b) { -; CHECK-LABEL: test_EQ_IcsEbT -; CHECK: sxth w8, w1 -; CHECK-NEXT: cmn w8, w0, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IcsEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: cmn w8, w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i32 %conv1 = sext i16 %b to i32 @@ -187,11 +207,12 @@ entry: } define i1 @test_EQ_IccEbT(i8 %a, i8 %b) { -; CHECK-LABEL: test_EQ_IccEbT -; CHECK: and w8, w1, #0xff -; CHECK-NEXT: cmn w8, w0, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret +; CHECK-LABEL: test_EQ_IccEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: cmn w8, w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i32 %conv1 = zext i8 %b to i32 @@ -201,10 +222,11 @@ entry: } define i1 @test_NE_IllEbT(i64 %a, i64 %b) { -; CHECK-LABEL: test_NE_IllEbT -; CHECK: cmn x1, x0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IllEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x1, x0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %add = sub i64 0, %b %cmp = icmp ne i64 %add, %a @@ -212,10 +234,11 @@ entry: } define i1 @test_NE_IliEbT(i64 %a, i32 %b) { -; CHECK-LABEL: test_NE_IliEbT -; CHECK: cmn x0, w1, sxtw -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IliEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x0, w1, sxtw +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = sext i32 %b to i64 %add = sub i64 0, %a @@ -224,10 +247,12 @@ entry: } define i1 @test_NE_IlsEbT(i64 %a, i16 %b) { -; CHECK-LABEL: test_NE_IlsEbT -; CHECK: cmn x0, w1, sxth -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IlsEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: cmn x0, w1, sxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = sext i16 %b to i64 %add = sub i64 0, %a @@ -236,10 +261,12 @@ entry: } define i1 @test_NE_IlcEbT(i64 %a, i8 %b) { -; CHECK-LABEL: test_NE_IlcEbT -; CHECK: cmn x0, w1, uxtb -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IlcEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: cmn x0, w1, uxtb +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = zext i8 %b to i64 %add = sub i64 0, %a @@ -248,10 +275,11 @@ entry: } define i1 @test_NE_IilEbT(i32 %a, i64 %b) { -; CHECK-LABEL: test_NE_IilEbT -; CHECK: cmn x1, w0, sxtw -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IilEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x1, w0, sxtw +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = sext i32 %a to i64 %add = sub i64 0, %b @@ -260,10 +288,11 @@ entry: } define i1 @test_NE_IiiEbT(i32 %a, i32 %b) { -; CHECK-LABEL: test_NE_IiiEbT -; CHECK: cmn w1, w0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IiiEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w1, w0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %add = sub i32 0, %b %cmp = icmp ne i32 %add, %a @@ -271,10 +300,11 @@ entry: } define i1 @test_NE_IisEbT(i32 %a, i16 %b) { -; CHECK-LABEL: test_NE_IisEbT -; CHECK: cmn w0, w1, sxth -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IisEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w0, w1, sxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = sext i16 %b to i32 %add = sub i32 0, %a @@ -283,10 +313,11 @@ entry: } define i1 @test_NE_IicEbT(i32 %a, i8 %b) { -; CHECK-LABEL: test_NE_IicEbT -; CHECK: cmn w0, w1, uxtb -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IicEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w0, w1, uxtb +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = zext i8 %b to i32 %add = sub i32 0, %a @@ -295,10 +326,12 @@ entry: } define i1 @test_NE_IslEbT(i16 %a, i64 %b) { -; CHECK-LABEL: test_NE_IslEbT -; CHECK: cmn x1, w0, sxth -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IslEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: cmn x1, w0, sxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = sext i16 %a to i64 %add = sub i64 0, %b @@ -307,10 +340,11 @@ entry: } define i1 @test_NE_IsiEbT(i16 %a, i32 %b) { -; CHECK-LABEL: test_NE_IsiEbT -; CHECK: cmn w1, w0, sxth -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IsiEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w1, w0, sxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = sext i16 %a to i32 %add = sub i32 0, %b @@ -319,11 +353,12 @@ entry: } define i1 @test_NE_IssEbT(i16 %a, i16 %b) { -; CHECK-LABEL:test_NE_IssEbT -; CHECK: sxth w8, w1 -; CHECK-NEXT: cmn w8, w0, sxth -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IssEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: cmn w8, w0, sxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = sext i16 %a to i32 %conv1 = sext i16 %b to i32 @@ -333,11 +368,12 @@ entry: } define i1 @test_NE_IscEbT(i16 %a, i8 %b) { -; CHECK-LABEL:test_NE_IscEbT -; CHECK: and w8, w1, #0xff -; CHECK-NEXT: cmn w8, w0, sxth -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IscEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: cmn w8, w0, sxth +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = sext i16 %a to i32 %conv1 = zext i8 %b to i32 @@ -347,10 +383,12 @@ entry: } define i1 @test_NE_IclEbT(i8 %a, i64 %b) { -; CHECK-LABEL:test_NE_IclEbT -; CHECK: cmn x1, w0, uxtb -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IclEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: cmn x1, w0, uxtb +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i64 %add = sub i64 0, %b @@ -359,10 +397,11 @@ entry: } define i1 @test_NE_IciEbT(i8 %a, i32 %b) { -; CHECK-LABEL:test_NE_IciEbT -; CHECK: cmn w1, w0, uxtb -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IciEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w1, w0, uxtb +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i32 %add = sub i32 0, %b @@ -371,11 +410,12 @@ entry: } define i1 @test_NE_IcsEbT(i8 %a, i16 %b) { -; CHECK-LABEL:test_NE_IcsEbT -; CHECK: sxth w8, w1 -; CHECK-NEXT: cmn w8, w0, uxtb -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IcsEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: cmn w8, w0, uxtb +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i32 %conv1 = sext i16 %b to i32 @@ -385,11 +425,12 @@ entry: } define i1 @test_NE_IccEbT(i8 %a, i8 %b) { -; CHECK-LABEL:test_NE_IccEbT -; CHECK: and w8, w1, #0xff -; CHECK-NEXT: cmn w8, w0, uxtb -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK-LABEL: test_NE_IccEbT: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: cmn w8, w0, uxtb +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret entry: %conv = zext i8 %a to i32 %conv1 = zext i8 %b to i32 diff --git a/llvm/test/CodeGen/AArch64/ctlz_zero_undef.ll b/llvm/test/CodeGen/AArch64/ctlz_zero_undef.ll new file mode 100644 index 00000000000000..943ff72633ca63 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ctlz_zero_undef.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s --mtriple=aarch64 | FileCheck %s + +declare i8 @llvm.ctlz.i8(i8, i1 immarg) +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1 immarg) +declare i11 @llvm.ctlz.i11(i11, i1 immarg) + +define i32 @clz_nzu8(i8 %self) { +; CHECK-LABEL: clz_nzu8: +; CHECK: // %bb.0: // %start +; CHECK-NEXT: lsl w8, w0, #24 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +start: + %ctlz_res = call i8 @llvm.ctlz.i8(i8 %self, i1 true) + %ret = zext i8 %ctlz_res to i32 + ret i32 %ret +} + +; non standard bit size argument to ctlz +define i32 @clz_nzu11(i11 %self) { +; CHECK-LABEL: clz_nzu11: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #21 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret + %ctlz_res = call i11 @llvm.ctlz.i11(i11 %self, i1 true) + %ret = zext i11 %ctlz_res to i32 + ret i32 %ret +} + +; vector type argument to ctlz intrinsic +define <8 x i32> @clz_vec_nzu8(<8 x i8> %self) { +; CHECK-LABEL: clz_vec_nzu8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz v0.8b, v0.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %ctlz_res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %self, i1 true) + %ret = zext <8 x i8> %ctlz_res to <8 x i32> + ret <8 x i32> %ret +} diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index 170ba7292ae608..cb85bbda80a803 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -109,29 +109,11 @@ define <4 x i32> @movi4s_lsl16() { } define <4 x i32> @movi4s_fneg() { -; CHECK-NOFP16-SD-LABEL: movi4s_fneg: -; CHECK-NOFP16-SD: // %bb.0: -; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s -; CHECK-NOFP16-SD-NEXT: ret -; -; CHECK-FP16-SD-LABEL: movi4s_fneg: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s -; CHECK-FP16-SD-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: movi4s_fneg: -; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s -; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: movi4s_fneg: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s -; CHECK-FP16-GI-NEXT: ret +; CHECK-LABEL: movi4s_fneg: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-NEXT: fneg v0.4s, v0.4s +; CHECK-NEXT: ret ret <4 x i32> } @@ -308,23 +290,17 @@ define <8 x i16> @mvni8h_neg() { ; CHECK-NOFP16-SD-NEXT: dup v0.8h, w8 ; CHECK-NOFP16-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: mvni8h_neg: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: movi v0.8h, #240 -; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h -; CHECK-FP16-SD-NEXT: ret +; CHECK-FP16-LABEL: mvni8h_neg: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v0.8h, #240 +; CHECK-FP16-NEXT: fneg v0.8h, v0.8h +; CHECK-FP16-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: mvni8h_neg: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0] +; CHECK-NOFP16-GI-NEXT: mov w8, #-32528 // =0xffff80f0 +; CHECK-NOFP16-GI-NEXT: dup v0.8h, w8 ; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: mvni8h_neg: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: movi v0.8h, #240 -; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h -; CHECK-FP16-GI-NEXT: ret ret <8 x i16> } @@ -494,29 +470,11 @@ define <2 x double> @fmov2d() { } define <2 x double> @fmov2d_neg0() { -; CHECK-NOFP16-SD-LABEL: fmov2d_neg0: -; CHECK-NOFP16-SD: // %bb.0: -; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d -; CHECK-NOFP16-SD-NEXT: ret -; -; CHECK-FP16-SD-LABEL: fmov2d_neg0: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000 -; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d -; CHECK-FP16-SD-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: fmov2d_neg0: -; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d -; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: fmov2d_neg0: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000 -; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: ret +; CHECK-LABEL: fmov2d_neg0: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: fneg v0.2d, v0.2d +; CHECK-NEXT: ret ret <2 x double> } @@ -581,5 +539,4 @@ define <2 x i32> @movi1d() { ret <2 x i32> %1 } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-FP16: {{.*}} ; CHECK-NOFP16: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll b/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll new file mode 100644 index 00000000000000..3e708b0678fbcd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll @@ -0,0 +1,276 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +; CHECK-LABEL: .LCPI0_0: +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 3 + +define void @sitofp_v8i8_to_v8f32(ptr %src, ptr %dst) { +; CHECK-LABEL: sitofp_v8i8_to_v8f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: adrp x9, .LCPI0_1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI0_1] +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: .LBB0_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr d2, [x0, x8, lsl #3] +; CHECK-NEXT: add x9, x1, x8, lsl #5 +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: tbl v3.16b, { v2.16b }, v0.16b +; CHECK-NEXT: tbl v2.16b, { v2.16b }, v1.16b +; CHECK-NEXT: scvtf v3.4s, v3.4s, #24 +; CHECK-NEXT: scvtf v2.4s, v2.4s, #24 +; CHECK-NEXT: stp q2, q3, [x9] +; CHECK-NEXT: b.eq .LBB0_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: ret +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.src = getelementptr inbounds <8 x i8>, ptr %src, i64 %iv + %l = load <8 x i8>, ptr %gep.src + %conv = sitofp <8 x i8> %l to <8 x float> + %gep.dst = getelementptr inbounds <8 x float>, ptr %dst, i64 %iv + store <8 x float> %conv, ptr %gep.dst + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +; CHECK-LABEL: .LCPI1_0: +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 12 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .LCPI1_2: +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .LCPI1_3: +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .byte 3 + +define void @sitofp_v16i8_to_v16f32(ptr %src, ptr %dst) { +; CHECK-LABEL: sitofp_v16i8_to_v16f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: adrp x9, .LCPI1_1 +; CHECK-NEXT: adrp x10, .LCPI1_2 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: adrp x8, .LCPI1_3 +; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: ldr q2, [x10, :lo12:.LCPI1_2] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI1_3] +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: .LBB1_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr q4, [x0, x8, lsl #4] +; CHECK-NEXT: add x9, x1, x8, lsl #6 +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: tbl v5.16b, { v4.16b }, v0.16b +; CHECK-NEXT: tbl v6.16b, { v4.16b }, v1.16b +; CHECK-NEXT: tbl v7.16b, { v4.16b }, v2.16b +; CHECK-NEXT: tbl v4.16b, { v4.16b }, v3.16b +; CHECK-NEXT: scvtf v5.4s, v5.4s, #24 +; CHECK-NEXT: scvtf v6.4s, v6.4s, #24 +; CHECK-NEXT: scvtf v7.4s, v7.4s, #24 +; CHECK-NEXT: scvtf v4.4s, v4.4s, #24 +; CHECK-NEXT: stp q6, q5, [x9, #32] +; CHECK-NEXT: stp q4, q7, [x9] +; CHECK-NEXT: b.eq .LBB1_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: ret +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.src = getelementptr inbounds <16 x i8>, ptr %src, i64 %iv + %l = load <16 x i8>, ptr %gep.src + %conv = sitofp <16 x i8> %l to <16 x float> + %gep.dst = getelementptr inbounds <16 x float>, ptr %dst, i64 %iv + store <16 x float> %conv, ptr %gep.dst + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + + +; Negative tests, currently we don't convert to f16/bf16 via `tbl`. +define void @sitofp_v8i8_to_v8f16(ptr %src, ptr %dst) { +; CHECK-LABEL: sitofp_v8i8_to_v8f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: .LBB2_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr d0, [x0, x8, lsl #3] +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: scvtf v1.4s, v1.4s +; CHECK-NEXT: scvtf v0.4s, v0.4s +; CHECK-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NEXT: str q1, [x1, x8, lsl #4] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq .LBB2_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: ret +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.src = getelementptr inbounds <8 x i8>, ptr %src, i64 %iv + %l = load <8 x i8>, ptr %gep.src + %conv = sitofp <8 x i8> %l to <8 x half> + %gep.dst = getelementptr inbounds <8 x half>, ptr %dst, i64 %iv + store <8 x half> %conv, ptr %gep.dst + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + + +; Negative test, conversion to double with the help of `tbl` not implemented (TODO) +define void @sitofp_v2i8_to_v2f64(ptr %src, ptr %dst) { +; CHECK-LABEL: sitofp_v2i8_to_v2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: .LBB3_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x9, x0, x8, lsl #1 +; CHECK-NEXT: ldrsb w10, [x9] +; CHECK-NEXT: ldrsb w9, [x9, #1] +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: str q0, [x1, x8, lsl #4] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq .LBB3_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: ret +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.src = getelementptr inbounds <2 x i8>, ptr %src, i64 %iv + %l = load <2 x i8>, ptr %gep.src + %conv = sitofp <2 x i8> %l to <2 x double> + %gep.dst = getelementptr inbounds <2 x double>, ptr %dst, i64 %iv + store <2 x double> %conv, ptr %gep.dst + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll index 21832dc320e425..3f0b86c271538a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll @@ -169,30 +169,29 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A_GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY8]], [[COPY9]], implicit $exec ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] ; GFX90A_GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY10]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX90A_GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX90A_GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec - ; GFX90A_GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A_GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A_GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A_GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]] + ; GFX90A_GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_3]] ; GFX90A_GFX940-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] ; GFX90A_GFX940-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec ; GFX90A_GFX940-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] @@ -200,7 +199,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A_GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A_GFX940-NEXT: S_BRANCH %bb.3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: bb.3 (%ir-block.35): + ; GFX90A_GFX940-NEXT: bb.3 (%ir-block.31): ; GFX90A_GFX940-NEXT: successors: %bb.4(0x80000000) ; GFX90A_GFX940-NEXT: {{ $}} ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -211,7 +210,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A_GFX940-NEXT: {{ $}} ; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: bb.5 (%ir-block.37): + ; GFX90A_GFX940-NEXT: bb.5 (%ir-block.33): ; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll index 077aff46839a60..504735b4985ad5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll @@ -179,32 +179,31 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY8]], [[COPY9]], implicit $exec ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] ; GFX90A-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY10]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec - ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX90A-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec ; GFX90A-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec ; GFX90A-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec ; GFX90A-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec ; GFX90A-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec - ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec ; GFX90A-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX90A-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY18]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec - ; GFX90A-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]] + ; GFX90A-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_3]] ; GFX90A-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] ; GFX90A-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY19]], implicit $exec ; GFX90A-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] @@ -212,7 +211,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.3 (%ir-block.36): + ; GFX90A-NEXT: bb.3 (%ir-block.32): ; GFX90A-NEXT: successors: %bb.5(0x80000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -222,11 +221,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: bb.4.Flow: ; GFX90A-NEXT: successors: %bb.6(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %43, %bb.5, [[DEF]], %bb.1 + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %42, %bb.5, [[DEF]], %bb.1 ; GFX90A-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.6 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.5 (%ir-block.39): + ; GFX90A-NEXT: bb.5 (%ir-block.35): ; GFX90A-NEXT: successors: %bb.4(0x80000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 @@ -237,7 +236,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY21]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.6 (%ir-block.46): + ; GFX90A-NEXT: bb.6 (%ir-block.40): ; GFX90A-NEXT: $vgpr0 = COPY [[PHI]] ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -271,32 +270,31 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY8]], [[COPY9]], implicit $exec ; GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] ; GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY10]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec ; GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec ; GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec ; GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec ; GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec ; GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]] + ; GFX940-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] ; GFX940-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY18]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]] + ; GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_3]] ; GFX940-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] ; GFX940-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY19]], implicit $exec ; GFX940-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] @@ -304,7 +302,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX940-NEXT: S_BRANCH %bb.3 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.3 (%ir-block.36): + ; GFX940-NEXT: bb.3 (%ir-block.32): ; GFX940-NEXT: successors: %bb.5(0x80000000) ; GFX940-NEXT: {{ $}} ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -314,11 +312,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX940-NEXT: bb.4.Flow: ; GFX940-NEXT: successors: %bb.6(0x80000000) ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %42, %bb.5, [[DEF]], %bb.1 + ; GFX940-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %41, %bb.5, [[DEF]], %bb.1 ; GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX940-NEXT: S_BRANCH %bb.6 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.5 (%ir-block.39): + ; GFX940-NEXT: bb.5 (%ir-block.35): ; GFX940-NEXT: successors: %bb.4(0x80000000) ; GFX940-NEXT: {{ $}} ; GFX940-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 @@ -329,7 +327,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX940-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY21]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec ; GFX940-NEXT: S_BRANCH %bb.4 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.6 (%ir-block.46): + ; GFX940-NEXT: bb.6 (%ir-block.40): ; GFX940-NEXT: $vgpr0 = COPY [[PHI]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -355,35 +353,34 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX11-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY4]], [[COPY5]], implicit $exec - ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY6]], implicit-def dead $scc, implicit $exec - ; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_3]], 0, [[S_MOV_B32_3]], [[V_ADD_F32_e64_3]], 0, implicit $exec - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] + ; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec - ; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 15 - ; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_4]] - ; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READLANE_B32_]], [[S_MOV_B32_5]], [[V_MOV_B32_dpp5]] - ; GFX11-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 31 - ; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_6]] + ; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15 + ; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_3]] + ; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READLANE_B32_]], [[S_MOV_B32_4]], [[V_MOV_B32_dpp5]] + ; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 31 + ; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_5]] ; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_1]] ; GFX11-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY13]], implicit $exec ; GFX11-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] @@ -391,7 +388,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX11-NEXT: S_BRANCH %bb.3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: bb.3 (%ir-block.36): + ; GFX11-NEXT: bb.3 (%ir-block.29): ; GFX11-NEXT: successors: %bb.5(0x80000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -401,11 +398,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: bb.4.Flow: ; GFX11-NEXT: successors: %bb.6(0x80000000) ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %41, %bb.5, [[DEF]], %bb.1 + ; GFX11-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %40, %bb.5, [[DEF]], %bb.1 ; GFX11-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX11-NEXT: S_BRANCH %bb.6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: bb.5 (%ir-block.39): + ; GFX11-NEXT: bb.5 (%ir-block.32): ; GFX11-NEXT: successors: %bb.4(0x80000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 @@ -416,7 +413,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY15]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_BRANCH %bb.4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: bb.6 (%ir-block.47): + ; GFX11-NEXT: bb.6 (%ir-block.37): ; GFX11-NEXT: $vgpr0 = COPY [[PHI]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir index 85cfb9b320f150..68587630e21959 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -200,11 +200,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[CTLZ_ZERO_UNDEF]], [[UV]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[SHL]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll index 8ee0ee3b27bae8..d3944d3d52d776 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -1058,13 +1058,13 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 -; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 ; GFX7LESS-NEXT: v_mul_u32_u24_e32 v0, 5, v2 -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 -; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 +; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s5, v0 ; GFX7LESS-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm @@ -1095,10 +1095,10 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX89-NEXT: .LBB3_2: ; GFX89-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX89-NEXT: s_waitcnt lgkmcnt(0) -; GFX89-NEXT: v_readfirstlane_b32 s2, v0 -; GFX89-NEXT: v_readfirstlane_b32 s3, v1 -; GFX89-NEXT: v_mov_b32_e32 v0, s2 -; GFX89-NEXT: v_mov_b32_e32 v1, s3 +; GFX89-NEXT: v_readfirstlane_b32 s2, v1 +; GFX89-NEXT: v_readfirstlane_b32 s3, v0 +; GFX89-NEXT: v_mov_b32_e32 v0, s3 +; GFX89-NEXT: v_mov_b32_e32 v1, s2 ; GFX89-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, v[0:1] ; GFX89-NEXT: s_mov_b32 s3, 0xf000 ; GFX89-NEXT: s_mov_b32 s2, -1 @@ -1134,8 +1134,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3] ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 @@ -1169,8 +1169,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3] ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 @@ -1205,8 +1205,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1164-NEXT: .LBB3_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 @@ -1242,8 +1242,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1132-NEXT: .LBB3_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 @@ -1281,8 +1281,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1264-NEXT: .LBB3_2: ; GFX1264-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1264-NEXT: s_wait_kmcnt 0x0 -; GFX1264-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1264-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1264-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1264-NEXT: s_mov_b32 s3, 0x31016000 @@ -1318,8 +1318,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1232-NEXT: .LBB3_2: ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; GFX1232-NEXT: s_wait_kmcnt 0x0 -; GFX1232-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1232-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1232-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1232-NEXT: s_mov_b32 s3, 0x31016000 @@ -1367,15 +1367,15 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s7, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s6, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s2, v0 -; GFX7LESS-NEXT: v_readfirstlane_b32 s3, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s2, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s3, v0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mul_lo_u32 v0, s1, v2 ; GFX7LESS-NEXT: v_mul_hi_u32 v1, s0, v2 ; GFX7LESS-NEXT: v_mul_lo_u32 v2, s0, v2 ; GFX7LESS-NEXT: v_add_i32_e32 v1, vcc, v1, v0 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s3 -; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s2, v2 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s2 +; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s3, v2 ; GFX7LESS-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX7LESS-NEXT: s_endpgm @@ -1407,10 +1407,10 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX8-NEXT: buffer_wbinvl1_vol ; GFX8-NEXT: .LBB4_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX8-NEXT: v_readfirstlane_b32 s2, v0 -; GFX8-NEXT: v_readfirstlane_b32 s3, v1 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: v_readfirstlane_b32 s2, v1 +; GFX8-NEXT: v_readfirstlane_b32 s3, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, s3 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mul_lo_u32 v3, s1, v2 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s0, v2, v[0:1] @@ -1449,10 +1449,10 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: .LBB4_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v2, v[0:1] ; GFX9-NEXT: s_mov_b32 s7, 0xf000 @@ -1493,8 +1493,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1064-NEXT: .LBB4_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064-NEXT: v_readfirstlane_b32 s0, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1064-NEXT: v_readfirstlane_b32 s0, v0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s6, -1 @@ -1534,8 +1534,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1032-NEXT: .LBB4_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1032-NEXT: v_readfirstlane_b32 s0, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1032-NEXT: v_readfirstlane_b32 s0, v0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 @@ -1576,8 +1576,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB4_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1164-NEXT: s_mov_b32 s6, -1 @@ -1622,8 +1622,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB4_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1132-NEXT: s_mov_b32 s6, -1 @@ -1666,8 +1666,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1264-NEXT: global_inv scope:SCOPE_DEV ; GFX1264-NEXT: .LBB4_2: ; GFX1264-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX1264-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1264-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1264-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1264-NEXT: s_wait_kmcnt 0x0 ; GFX1264-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1264-NEXT: s_mov_b32 s6, -1 @@ -1706,8 +1706,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1232-NEXT: global_inv scope:SCOPE_DEV ; GFX1232-NEXT: .LBB4_2: ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX1232-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1232-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1232-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1232-NEXT: s_wait_kmcnt 0x0 ; GFX1232-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1232-NEXT: s_mov_b32 s6, -1 @@ -2925,13 +2925,13 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 -; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 ; GFX7LESS-NEXT: v_mul_u32_u24_e32 v0, 5, v2 -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 -; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 +; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 ; GFX7LESS-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm @@ -2961,12 +2961,12 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX8-NEXT: buffer_wbinvl1_vol ; GFX8-NEXT: .LBB9_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX8-NEXT: v_readfirstlane_b32 s4, v0 -; GFX8-NEXT: v_readfirstlane_b32 s5, v1 +; GFX8-NEXT: v_readfirstlane_b32 s4, v1 +; GFX8-NEXT: v_readfirstlane_b32 s5, v0 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v2 ; GFX8-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 -; GFX8-NEXT: v_mov_b32_e32 v2, s5 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 @@ -2999,12 +2999,12 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: .LBB9_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-NEXT: v_readfirstlane_b32 s4, v0 -; GFX9-NEXT: v_readfirstlane_b32 s5, v1 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: v_readfirstlane_b32 s5, v0 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v2 ; GFX9-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 -; GFX9-NEXT: v_mov_b32_e32 v2, s5 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s4, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s5, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 @@ -3291,15 +3291,15 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s7, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s6, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s2, v0 -; GFX7LESS-NEXT: v_readfirstlane_b32 s3, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s2, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s3, v0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mul_lo_u32 v0, s1, v2 ; GFX7LESS-NEXT: v_mul_hi_u32 v1, s0, v2 ; GFX7LESS-NEXT: v_mul_lo_u32 v2, s0, v2 ; GFX7LESS-NEXT: v_add_i32_e32 v1, vcc, v1, v0 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s3 -; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s2, v2 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s2 +; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s3, v2 ; GFX7LESS-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX7LESS-NEXT: s_endpgm @@ -3334,11 +3334,11 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mul_lo_u32 v4, s1, v2 ; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s0, v2, 0 -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v4 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s0, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s1, v2 ; GFX8-NEXT: s_mov_b32 s7, 0xf000 ; GFX8-NEXT: s_mov_b32 s6, -1 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc @@ -3379,11 +3379,11 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s3, v2, v[4:5] -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, v4 -; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s0, v3 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s1, v3 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index af6f69130910d0..b0b40aa952a9fb 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -944,12 +944,12 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 -; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v0 ; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 ; GFX7LESS-NEXT: v_mul_u32_u24_e32 v0, 5, v2 -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 -; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 +; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s5, v0 ; GFX7LESS-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -974,11 +974,11 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB4_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX8-NEXT: v_readfirstlane_b32 s2, v0 -; GFX8-NEXT: v_readfirstlane_b32 s3, v1 +; GFX8-NEXT: v_readfirstlane_b32 s2, v1 +; GFX8-NEXT: v_readfirstlane_b32 s3, v0 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: v_mov_b32_e32 v0, s3 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, v[0:1] ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 @@ -1005,11 +1005,11 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB4_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX9-NEXT: v_readfirstlane_b32 s2, v0 -; GFX9-NEXT: v_readfirstlane_b32 s3, v1 +; GFX9-NEXT: v_readfirstlane_b32 s2, v1 +; GFX9-NEXT: v_readfirstlane_b32 s3, v0 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, v[0:1] ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 @@ -1039,8 +1039,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3] ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 @@ -1068,8 +1068,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3] ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 @@ -1099,8 +1099,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: .LBB4_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 @@ -1132,8 +1132,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: .LBB4_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 @@ -1182,14 +1182,14 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s4, s0 ; GFX7LESS-NEXT: s_mov_b32 s5, s1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s0, v0 -; GFX7LESS-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s0, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s1, v0 ; GFX7LESS-NEXT: v_mul_lo_u32 v0, s3, v2 ; GFX7LESS-NEXT: v_mul_hi_u32 v1, s2, v2 ; GFX7LESS-NEXT: v_mul_lo_u32 v2, s2, v2 ; GFX7LESS-NEXT: v_add_i32_e32 v1, vcc, v1, v0 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s1 -; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s0, v2 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s0 +; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s1, v2 ; GFX7LESS-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX7LESS-NEXT: s_endpgm @@ -1217,10 +1217,10 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB5_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX8-NEXT: v_readfirstlane_b32 s4, v0 -; GFX8-NEXT: v_readfirstlane_b32 s5, v1 -; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: v_readfirstlane_b32 s4, v1 +; GFX8-NEXT: v_readfirstlane_b32 s5, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, s5 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mul_lo_u32 v3, s3, v2 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s2, v2, v[0:1] @@ -1256,10 +1256,10 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB5_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-NEXT: v_readfirstlane_b32 s4, v0 -; GFX9-NEXT: v_readfirstlane_b32 s5, v1 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: v_readfirstlane_b32 s5, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, v[0:1] ; GFX9-NEXT: s_mov_b32 s7, 0xf000 @@ -1296,8 +1296,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX1064-NEXT: .LBB5_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX1064-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1064-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, s[4:5] ; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s3, v2, v[1:2] @@ -1331,8 +1331,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX1032-NEXT: .LBB5_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX1032-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1032-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, s2, v2, s[4:5] ; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s2, s3, v2, v[1:2] @@ -1367,8 +1367,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB5_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX1164-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1164-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5] @@ -1407,8 +1407,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB5_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX1132-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1132-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5] @@ -2444,12 +2444,12 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 -; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v0 ; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 ; GFX7LESS-NEXT: v_mul_u32_u24_e32 v0, 5, v2 -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 -; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 +; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 ; GFX7LESS-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -2475,12 +2475,12 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB11_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: v_readfirstlane_b32 s4, v0 -; GFX8-NEXT: v_readfirstlane_b32 s5, v1 +; GFX8-NEXT: v_readfirstlane_b32 s4, v1 +; GFX8-NEXT: v_readfirstlane_b32 s5, v0 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v2 ; GFX8-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 -; GFX8-NEXT: v_mov_b32_e32 v2, s5 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc @@ -2507,12 +2507,12 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB11_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: v_readfirstlane_b32 s4, v0 -; GFX9-NEXT: v_readfirstlane_b32 s5, v1 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: v_readfirstlane_b32 s5, v0 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v2 ; GFX9-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 -; GFX9-NEXT: v_mov_b32_e32 v2, s5 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s4, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s5, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc @@ -2696,14 +2696,14 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, i64 %subitive) ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s4, s0 ; GFX7LESS-NEXT: s_mov_b32 s5, s1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s0, v0 -; GFX7LESS-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s0, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s1, v0 ; GFX7LESS-NEXT: v_mul_lo_u32 v0, s3, v2 ; GFX7LESS-NEXT: v_mul_hi_u32 v1, s2, v2 ; GFX7LESS-NEXT: v_mul_lo_u32 v2, s2, v2 ; GFX7LESS-NEXT: v_add_i32_e32 v1, vcc, v1, v0 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s1 -; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s0, v2 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s0 +; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s1, v2 ; GFX7LESS-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX7LESS-NEXT: s_endpgm @@ -2736,11 +2736,11 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, i64 %subitive) ; GFX8-NEXT: s_mov_b32 s5, s1 ; GFX8-NEXT: v_mul_lo_u32 v4, s3, v2 ; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v2, 0 -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v3, v4 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s0, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s1, v2 ; GFX8-NEXT: s_mov_b32 s7, 0xf000 ; GFX8-NEXT: s_mov_b32 s6, -1 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v3, v1, vcc @@ -2776,11 +2776,11 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, i64 %subitive) ; GFX9-NEXT: s_mov_b32 s4, s0 ; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v2, v[4:5] -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, v4 -; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s0, v3 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s1, v3 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc @@ -4149,8 +4149,8 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX7LESS-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -4249,8 +4249,8 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc ; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX1064-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1] @@ -4280,8 +4280,8 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo ; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo ; GFX1032-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1] @@ -4312,8 +4312,8 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: .LBB18_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -4345,8 +4345,8 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: .LBB18_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo ; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -4663,8 +4663,8 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_bfrev_b32_e32 v1, -2 ; GFX7LESS-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -4763,8 +4763,8 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc ; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX1064-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1] @@ -4794,8 +4794,8 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc_lo ; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc_lo ; GFX1032-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1] @@ -4826,8 +4826,8 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: .LBB20_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -4859,8 +4859,8 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: .LBB20_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc_lo ; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -5177,8 +5177,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 @@ -5208,8 +5208,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB22_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 +; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1] @@ -5240,8 +5240,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB22_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 +; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1] @@ -5274,8 +5274,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX1064-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1] @@ -5305,8 +5305,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo ; GFX1032-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1] @@ -5337,8 +5337,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: .LBB22_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -5370,8 +5370,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: .LBB22_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_mov_b32_e32 v1, 0 ; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -5688,8 +5688,8 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 +; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GFX7LESS-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 @@ -5719,8 +5719,8 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB24_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 +; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1] @@ -5751,8 +5751,8 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB24_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 +; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1] @@ -5785,8 +5785,8 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX1064-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] @@ -5816,8 +5816,8 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo ; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc_lo ; GFX1032-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1] @@ -5848,8 +5848,8 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: .LBB24_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -5881,8 +5881,8 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: .LBB24_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 -; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo ; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index 756b8190996827..d269eb680138bb 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -408,9 +408,8 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_and_b32 s2, s2, 0xffff +; VI-NEXT: s_lshl_b32 s2, s2, 16 ; VI-NEXT: s_flbit_i32_b32 s2, s2 -; VI-NEXT: s_add_i32 s2, s2, -16 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s2 @@ -712,8 +711,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no ; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: v_ffbh_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v1, vcc, -16, v1 +; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; VI-NEXT: v_ffbh_u32_e32 v1, v1 ; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 ; VI-NEXT: v_cndmask_b32_e32 v2, 32, v1, vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -2168,18 +2167,15 @@ define i7 @v_ctlz_zero_undef_i7(i7 %val) { ; SI-LABEL: v_ctlz_zero_undef_i7: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 25, v0 ; SI-NEXT: v_ffbh_u32_e32 v0, v0 -; SI-NEXT: v_subrev_i32_e32 v0, vcc, 25, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_ctlz_zero_undef_i7: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_and_b32_e32 v0, 0x7f, v0 -; VI-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v0, vcc, -16, v0 -; VI-NEXT: v_add_u16_e32 v0, -9, v0 +; VI-NEXT: v_lshlrev_b32_e32 v0, 25, v0 +; VI-NEXT: v_ffbh_u32_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_ctlz_zero_undef_i7: @@ -2204,9 +2200,8 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out, ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_and_b32 s2, s2, 0x3ffff -; SI-NEXT: s_flbit_i32_b32 s2, s2 -; SI-NEXT: s_add_i32 s4, s2, -14 +; SI-NEXT: s_lshl_b32 s2, s2, 14 +; SI-NEXT: s_flbit_i32_b32 s4, s2 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: s_bfe_u32 s4, s4, 0x20010 @@ -2221,15 +2216,14 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out, ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_and_b32 s2, s2, 0x3ffff -; VI-NEXT: s_flbit_i32_b32 s2, s2 +; VI-NEXT: s_lshl_b32 s2, s2, 14 ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: s_add_i32 s2, s2, -14 +; VI-NEXT: s_flbit_i32_b32 s2, s2 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: s_add_u32 s0, s0, 2 -; VI-NEXT: flat_store_short v[0:1], v2 +; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: flat_store_short v[0:1], v2 ; VI-NEXT: s_bfe_u32 s2, s2, 0x20010 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 @@ -2239,20 +2233,18 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out, ; ; EG-LABEL: s_ctlz_zero_undef_i18: ; EG: ; %bb.0: -; EG-NEXT: ALU 30, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 28, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT MSKOR T1.XW, T3.X ; EG-NEXT: MEM_RAT MSKOR T0.XW, T2.X ; EG-NEXT: CF_END ; EG-NEXT: ALU clause starting at 4: -; EG-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x, -; EG-NEXT: 262143(3.673406e-40), 0(0.000000e+00) +; EG-NEXT: LSHL * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 14(1.961818e-44), 0(0.000000e+00) ; EG-NEXT: FFBH_UINT T0.W, PV.W, ; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) -; EG-NEXT: ADD_INT * T0.W, PV.W, literal.x, -; EG-NEXT: -14(nan), 0(0.000000e+00) ; EG-NEXT: AND_INT T2.W, PV.W, literal.x, -; EG-NEXT: LSHL * T1.W, T1.W, literal.y, +; EG-NEXT: LSHL * T1.W, PS, literal.y, ; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) ; EG-NEXT: LSHL T1.X, PV.W, PS, ; EG-NEXT: LSHL * T1.W, literal.x, PS, @@ -2300,17 +2292,15 @@ define i18 @v_ctlz_zero_undef_i18(i18 %val) { ; SI-LABEL: v_ctlz_zero_undef_i18: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_and_b32_e32 v0, 0x3ffff, v0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 14, v0 ; SI-NEXT: v_ffbh_u32_e32 v0, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, -14, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_ctlz_zero_undef_i18: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_and_b32_e32 v0, 0x3ffff, v0 +; VI-NEXT: v_lshlrev_b32_e32 v0, 14, v0 ; VI-NEXT: v_ffbh_u32_e32 v0, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, -14, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_ctlz_zero_undef_i18: @@ -2332,23 +2322,19 @@ define <2 x i18> @v_ctlz_zero_undef_v2i18(<2 x i18> %val) { ; SI-LABEL: v_ctlz_zero_undef_v2i18: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_and_b32_e32 v1, 0x3ffff, v1 -; SI-NEXT: v_and_b32_e32 v0, 0x3ffff, v0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 14, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 14, v1 ; SI-NEXT: v_ffbh_u32_e32 v0, v0 ; SI-NEXT: v_ffbh_u32_e32 v1, v1 -; SI-NEXT: v_add_i32_e32 v0, vcc, -14, v0 -; SI-NEXT: v_add_i32_e32 v1, vcc, -14, v1 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_ctlz_zero_undef_v2i18: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_and_b32_e32 v1, 0x3ffff, v1 -; VI-NEXT: v_and_b32_e32 v0, 0x3ffff, v0 +; VI-NEXT: v_lshlrev_b32_e32 v0, 14, v0 +; VI-NEXT: v_lshlrev_b32_e32 v1, 14, v1 ; VI-NEXT: v_ffbh_u32_e32 v0, v0 ; VI-NEXT: v_ffbh_u32_e32 v1, v1 -; VI-NEXT: v_add_u32_e32 v0, vcc, -14, v0 -; VI-NEXT: v_add_u32_e32 v1, vcc, -14, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_ctlz_zero_undef_v2i18: @@ -2383,11 +2369,11 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) { ; VI-LABEL: v_ctlz_zero_undef_v2i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_ffbh_u32_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v0, vcc, -16, v0 -; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v0, vcc, 0xfff00000, v0 +; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0 +; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; VI-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_ffbh_u32_e32 v0, v0 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_ctlz_zero_undef_v2i16: @@ -2429,13 +2415,13 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) { ; VI-LABEL: v_ctlz_zero_undef_v3i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_ffbh_u32_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v0, vcc, -16, v0 -; VI-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; VI-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v1, vcc, -16, v1 -; VI-NEXT: v_add_u32_e32 v0, vcc, 0xfff00000, v0 +; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; VI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; VI-NEXT: v_ffbh_u32_e32 v2, v2 +; VI-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_ffbh_u32_e32 v1, v1 +; VI-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_ctlz_zero_undef_v3i16: @@ -2483,16 +2469,16 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) { ; VI-LABEL: v_ctlz_zero_undef_v4i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_ffbh_u32_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; VI-NEXT: v_ffbh_u32_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v1, vcc, -16, v1 -; VI-NEXT: v_add_u32_e32 v0, vcc, -16, v0 -; VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v0, vcc, 0xfff00000, v0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 0xfff00000, v1 +; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; VI-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; VI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; VI-NEXT: v_ffbh_u32_e32 v2, v2 +; VI-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_ffbh_u32_e32 v3, v3 +; VI-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_ctlz_zero_undef_v4i16: @@ -2567,28 +2553,19 @@ define <2 x i7> @v_ctlz_zero_undef_v2i7(<2 x i7> %val) { ; SI-LABEL: v_ctlz_zero_undef_v2i7: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_and_b32_e32 v1, 0x7f, v1 -; SI-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 25, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 25, v1 ; SI-NEXT: v_ffbh_u32_e32 v0, v0 ; SI-NEXT: v_ffbh_u32_e32 v1, v1 -; SI-NEXT: v_subrev_i32_e32 v0, vcc, 25, v0 -; SI-NEXT: v_subrev_i32_e32 v1, vcc, 25, v1 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_ctlz_zero_undef_v2i7: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_and_b32_e32 v2, 0x7f007f, v0 -; VI-NEXT: v_bfe_u32 v0, v0, 16, 7 +; VI-NEXT: v_lshlrev_b32_e32 v0, 25, v0 +; VI-NEXT: v_lshlrev_b32_e32 v1, 25, v1 ; VI-NEXT: v_ffbh_u32_e32 v0, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, -16, v0 -; VI-NEXT: v_add_u16_e32 v1, -9, v0 -; VI-NEXT: v_and_b32_e32 v0, 0x7f, v2 -; VI-NEXT: v_ffbh_u32_e32 v0, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, -16, v0 -; VI-NEXT: v_add_u16_e32 v0, -9, v0 +; VI-NEXT: v_ffbh_u32_e32 v1, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_ctlz_zero_undef_v2i7: diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll index ee0910b21f0245..60c3328b08c6c9 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll @@ -170,9 +170,8 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX908-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec ; GFX908-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX908-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + ; GFX908-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def dead $scc, implicit $exec ; GFX908-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX908-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec ; GFX908-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec @@ -185,14 +184,14 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX908-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec ; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec ; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]] + ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]] ; GFX908-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec ; GFX908-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX908-NEXT: S_BRANCH %bb.2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: bb.2 (%ir-block.35): + ; GFX908-NEXT: bb.2 (%ir-block.31): ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -204,7 +203,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: bb.4 (%ir-block.37): + ; GFX908-NEXT: bb.4 (%ir-block.33): ; GFX908-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX908-NEXT: S_ENDPGM 0 ; @@ -232,9 +231,8 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX90A_GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec ; GFX90A_GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX90A_GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + ; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def dead $scc, implicit $exec ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec @@ -247,14 +245,14 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]] + ; GFX90A_GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]] ; GFX90A_GFX940-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec ; GFX90A_GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec ; GFX90A_GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX90A_GFX940-NEXT: S_BRANCH %bb.2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: bb.2 (%ir-block.35): + ; GFX90A_GFX940-NEXT: bb.2 (%ir-block.31): ; GFX90A_GFX940-NEXT: successors: %bb.3(0x80000000) ; GFX90A_GFX940-NEXT: {{ $}} ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -266,7 +264,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A_GFX940-NEXT: {{ $}} ; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: bb.4 (%ir-block.37): + ; GFX90A_GFX940-NEXT: bb.4 (%ir-block.33): ; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX90A_GFX940-NEXT: S_ENDPGM 0 ; @@ -290,9 +288,8 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX11_GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $exec_lo ; GFX11_GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX11_GFX12-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY4]], [[S_MOV_B32_]], implicit $exec - ; GFX11_GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX11_GFX12-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec ; GFX11_GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + ; GFX11_GFX12-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def dead $scc, implicit $exec ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec ; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec @@ -301,15 +298,15 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec ; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX11_GFX12-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX11_GFX12-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec + ; GFX11_GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX11_GFX12-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_1]], 0, [[S_MOV_B32_1]], [[V_ADD_F32_e64_3]], 0, implicit $exec ; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_PERMLANEX16_B32_e64_]], 0, 0, implicit $mode, implicit $exec ; GFX11_GFX12-NEXT: early-clobber %1:vgpr_32 = STRICT_WWM killed [[V_ADD_F32_e64_4]], implicit $exec ; GFX11_GFX12-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_LO_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec ; GFX11_GFX12-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX11_GFX12-NEXT: S_BRANCH %bb.2 ; GFX11_GFX12-NEXT: {{ $}} - ; GFX11_GFX12-NEXT: bb.2 (%ir-block.28): + ; GFX11_GFX12-NEXT: bb.2 (%ir-block.24): ; GFX11_GFX12-NEXT: successors: %bb.3(0x80000000) ; GFX11_GFX12-NEXT: {{ $}} ; GFX11_GFX12-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -320,7 +317,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX11_GFX12-NEXT: {{ $}} ; GFX11_GFX12-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX11_GFX12-NEXT: {{ $}} - ; GFX11_GFX12-NEXT: bb.4 (%ir-block.30): + ; GFX11_GFX12-NEXT: bb.4 (%ir-block.26): ; GFX11_GFX12-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX11_GFX12-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll index d4dee983d4fc00..1b49b68fc5ba3d 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll @@ -180,9 +180,8 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX90A-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec ; GFX90A-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX90A-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + ; GFX90A-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def dead $scc, implicit $exec ; GFX90A-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX90A-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec ; GFX90A-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec @@ -196,15 +195,15 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec ; GFX90A-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec ; GFX90A-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec - ; GFX90A-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]] + ; GFX90A-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX90A-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]] ; GFX90A-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec ; GFX90A-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; GFX90A-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.2 (%ir-block.36): + ; GFX90A-NEXT: bb.2 (%ir-block.32): ; GFX90A-NEXT: successors: %bb.4(0x80000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -219,17 +218,17 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.5 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.4 (%ir-block.39): + ; GFX90A-NEXT: bb.4 (%ir-block.35): ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF1]], %bb.1, [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.2 ; GFX90A-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec - ; GFX90A-NEXT: early-clobber %45:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec - ; GFX90A-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_READFIRSTLANE_B32_]], 0, killed %45, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: early-clobber %44:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec + ; GFX90A-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_READFIRSTLANE_B32_]], 0, killed %44, 0, 0, implicit $mode, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: bb.5 (%ir-block.46): + ; GFX90A-NEXT: bb.5 (%ir-block.40): ; GFX90A-NEXT: $vgpr0 = COPY [[PHI]] ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; @@ -258,9 +257,8 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec ; GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + ; GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def dead $scc, implicit $exec ; GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec ; GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec @@ -274,15 +272,15 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec ; GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec ; GFX940-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]] + ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]] ; GFX940-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec ; GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec ; GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX940-NEXT: S_BRANCH %bb.2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.2 (%ir-block.36): + ; GFX940-NEXT: bb.2 (%ir-block.32): ; GFX940-NEXT: successors: %bb.4(0x80000000) ; GFX940-NEXT: {{ $}} ; GFX940-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -297,17 +295,17 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX940-NEXT: S_BRANCH %bb.5 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.4 (%ir-block.39): + ; GFX940-NEXT: bb.4 (%ir-block.35): ; GFX940-NEXT: successors: %bb.3(0x80000000) ; GFX940-NEXT: {{ $}} ; GFX940-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF1]], %bb.1, [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.2 ; GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec - ; GFX940-NEXT: early-clobber %44:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_READFIRSTLANE_B32_]], 0, killed %44, 0, 0, implicit $mode, implicit $exec + ; GFX940-NEXT: early-clobber %43:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec + ; GFX940-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_READFIRSTLANE_B32_]], 0, killed %43, 0, 0, implicit $mode, implicit $exec ; GFX940-NEXT: S_BRANCH %bb.3 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.5 (%ir-block.46): + ; GFX940-NEXT: bb.5 (%ir-block.40): ; GFX940-NEXT: $vgpr0 = COPY [[PHI]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; @@ -332,9 +330,8 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $exec_lo ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX11-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY4]], [[S_MOV_B32_]], implicit $exec - ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + ; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def dead $scc, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec @@ -343,24 +340,24 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_1]], 0, [[S_MOV_B32_1]], [[V_ADD_F32_e64_3]], 0, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec ; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec - ; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15 - ; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_3]] - ; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed [[V_READLANE_B32_]], killed [[S_MOV_B32_4]], [[V_MOV_B32_dpp5]] - ; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 31 - ; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_5]] + ; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 15 + ; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_2]] + ; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed [[V_READLANE_B32_]], killed [[S_MOV_B32_3]], [[V_MOV_B32_dpp5]] + ; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 31 + ; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_4]] ; GFX11-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_1]], implicit $exec ; GFX11-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_LO_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec ; GFX11-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; GFX11-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX11-NEXT: S_BRANCH %bb.2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: bb.2 (%ir-block.36): + ; GFX11-NEXT: bb.2 (%ir-block.29): ; GFX11-NEXT: successors: %bb.4(0x80000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -375,17 +372,17 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX11-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX11-NEXT: S_BRANCH %bb.5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: bb.4 (%ir-block.39): + ; GFX11-NEXT: bb.4 (%ir-block.32): ; GFX11-NEXT: successors: %bb.3(0x80000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF1]], %bb.1, [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.2 ; GFX11-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec - ; GFX11-NEXT: early-clobber %44:vgpr_32 = STRICT_WWM [[V_WRITELANE_B32_]], implicit $exec - ; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_READFIRSTLANE_B32_]], 0, killed %44, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: early-clobber %43:vgpr_32 = STRICT_WWM [[V_WRITELANE_B32_]], implicit $exec + ; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_READFIRSTLANE_B32_]], 0, killed %43, 0, 0, implicit $mode, implicit $exec ; GFX11-NEXT: S_BRANCH %bb.3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: bb.5 (%ir-block.47): + ; GFX11-NEXT: bb.5 (%ir-block.37): ; GFX11-NEXT: $vgpr0 = COPY [[PHI]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic diff --git a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll index 791862dcae8ddc..18142c108ed58c 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll @@ -9,7 +9,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -20,44 +20,37 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_uns ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = fadd float [[TMP16]], [[TMP28:%.*]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[TMP21:%.*]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = fadd float [[ACCUMULATOR]], [[TMP24]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = fadd float [[ACCUMULATOR]], [[TMP20]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -65,43 +58,36 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_uns ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP11]], [[TMP13]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP24:%.*]] = fadd float [[TMP22]], [[TMP23]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP21:%.*]] = fadd float [[TMP19]], [[TMP20]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) +; IR-DPP-NEXT: [[TMP32:%.*]] = fadd float [[TMP30]], [[TMP31]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) -; IR-DPP-NEXT: [[TMP39:%.*]] = fadd float [[TMP37]], [[TMP38]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 ret float %result @@ -110,7 +96,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_uns define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -121,44 +107,37 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_sco ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = fadd float [[TMP16]], [[TMP28:%.*]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[TMP21:%.*]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = fadd float [[ACCUMULATOR]], [[TMP24]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = fadd float [[ACCUMULATOR]], [[TMP20]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -166,43 +145,36 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_sco ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP11]], [[TMP13]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP24:%.*]] = fadd float [[TMP22]], [[TMP23]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP21:%.*]] = fadd float [[TMP19]], [[TMP20]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) +; IR-DPP-NEXT: [[TMP32:%.*]] = fadd float [[TMP30]], [[TMP31]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) -; IR-DPP-NEXT: [[TMP39:%.*]] = fadd float [[TMP37]], [[TMP38]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 ret float %result @@ -211,7 +183,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_sco define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -222,44 +194,37 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_un ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("one-as") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] syncscope("one-as") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP21:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -267,43 +232,36 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_un ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("one-as") monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("one-as") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic ret float %result @@ -312,7 +270,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_un define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -323,44 +281,37 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_un ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("one-as") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] syncscope("one-as") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP21:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -368,43 +319,36 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_un ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("one-as") monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("one-as") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic ret float %result @@ -413,7 +357,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_un define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -424,44 +368,37 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP21:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -469,43 +406,36 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic ret float %result @@ -514,7 +444,7 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -525,44 +455,37 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP14]], float [[TMP21:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -570,43 +493,36 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic ret float %result @@ -615,7 +531,7 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 { ; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe( ; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]] +; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] ; IR: 2: ; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -630,16 +546,14 @@ define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_uns ; IR-NEXT: br label [[TMP12]] ; IR: 12: ; IR-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) -; IR-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float -; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] -; IR-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]]) -; IR-NEXT: br label [[TMP20]] -; IR: 20: -; IR-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ] -; IR-NEXT: ret float [[TMP21]] +; IR-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) +; IR-NEXT: [[TMP15:%.*]] = uitofp i32 [[TMP8]] to float +; IR-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] +; IR-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP16]]) +; IR-NEXT: br label [[TMP18]] +; IR: 18: +; IR-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] +; IR-NEXT: ret float [[TMP19]] ; %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic ret float %result @@ -648,7 +562,7 @@ define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_uns define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 { ; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -659,44 +573,37 @@ define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_uns ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP28:%.*]]) -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP21:%.*]]) +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP24]]) -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -704,43 +611,36 @@ define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_uns ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 2143289344) -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP13]]) -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP15]]) -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP17]]) -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.minnum.f32(float [[TMP18]], float [[TMP19]]) -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.minnum.f32(float [[TMP20]], float [[TMP21]]) -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.minnum.f32(float [[TMP22]], float [[TMP23]]) -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.minnum.f32(float [[TMP9]], float [[TMP10]]) +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP12]]) +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP13]], float [[TMP14]]) +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP15]], float [[TMP16]]) +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP17]], float [[TMP18]]) +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.minnum.f32(float [[TMP19]], float [[TMP20]]) +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) +; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.minnum.f32(float [[TMP30]], float [[TMP31]]) ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) -; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.minnum.f32(float [[TMP37]], float [[TMP38]]) -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic ret float %result @@ -749,7 +649,7 @@ define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_uns define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -764,20 +664,18 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_uns ; IR-ITERATIVE-NEXT: br label [[TMP12]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP20]] -; IR-ITERATIVE: 20: -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP21]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP18]] +; IR-ITERATIVE: 18: +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP19]] ; ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -792,16 +690,14 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_uns ; IR-DPP-NEXT: br label [[TMP12]] ; IR-DPP: 12: ; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-DPP-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-DPP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP20]] -; IR-DPP: 20: -; IR-DPP-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ] -; IR-DPP-NEXT: ret float [[TMP21]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: br label [[TMP18]] +; IR-DPP: 18: +; IR-DPP-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] +; IR-DPP-NEXT: ret float [[TMP19]] ; %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic ret float %result @@ -810,7 +706,7 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_uns define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -821,44 +717,37 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_uns ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP21:%.*]], metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -866,43 +755,36 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_uns ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 2143289344) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP13]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP15]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP17]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP18]], float [[TMP19]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP20]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP22]], float [[TMP23]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP9]], float [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP13]], float [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP15]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP17]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP19]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP30]], float [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP37]], float [[TMP38]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic ret float %result @@ -911,7 +793,7 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_uns define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -922,44 +804,37 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_st ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP21:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -967,43 +842,36 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_st ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 ret float %result @@ -1012,7 +880,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_st define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP16:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -1023,44 +891,37 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_st ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP22:%.*]] monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP14]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP18]] -; IR-ITERATIVE: 18: -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret float [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP21:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP16]] +; IR-ITERATIVE: 16: +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP15]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret float [[TMP17]] ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32 -; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32 -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float -; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]] -; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1 -; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]] -; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP21]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP25:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 +; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP19]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP19]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = shl i64 1, [[TMP18]] +; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = xor i64 [[TMP23]], -1 +; IR-ITERATIVE-NEXT: [[TMP25]] = and i64 [[ACTIVEBITS]], [[TMP24]] +; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP26]], label [[COMPUTEEND]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -1068,43 +929,36 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_st ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]] -; IR-DPP: 31: -; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] monotonic, align 4 +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] +; IR-DPP: 26: +; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: +; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: br label [[TMP33]] ; IR-DPP: 33: -; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ] -; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32 -; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP35]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP40]] -; IR-DPP: 40: -; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ] -; IR-DPP-NEXT: ret float [[TMP41]] +; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP32]], [[TMP28]] ] +; IR-DPP-NEXT: ret float [[TMP34]] ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 ret float %result @@ -1275,7 +1129,7 @@ define amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_s define amdgpu_ps double @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 { ; IR-LABEL: @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe( ; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP26:%.*]] +; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] ; IR: 2: ; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -1290,22 +1144,14 @@ define amdgpu_ps double @global_atomic_fmin_double_uni_address_uni_value_agent_s ; IR-NEXT: br label [[TMP12]] ; IR: 12: ; IR-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-NEXT: [[TMP14:%.*]] = bitcast double [[TMP13]] to i64 -; IR-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP14]], 32 -; IR-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 -; IR-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP15]]) -; IR-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP17]]) -; IR-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; IR-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP19]], i32 1 -; IR-NEXT: [[TMP22:%.*]] = bitcast <2 x i32> [[TMP21]] to double -; IR-NEXT: [[TMP23:%.*]] = uitofp i32 [[TMP8]] to double -; IR-NEXT: [[TMP24:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] -; IR-NEXT: [[TMP25:%.*]] = call double @llvm.minnum.f64(double [[TMP22]], double [[TMP24]]) -; IR-NEXT: br label [[TMP26]] -; IR: 26: -; IR-NEXT: [[TMP27:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP25]], [[TMP12]] ] -; IR-NEXT: ret double [[TMP27]] +; IR-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) +; IR-NEXT: [[TMP15:%.*]] = uitofp i32 [[TMP8]] to double +; IR-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] +; IR-NEXT: [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP16]]) +; IR-NEXT: br label [[TMP18]] +; IR: 18: +; IR-NEXT: [[TMP19:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] +; IR-NEXT: ret double [[TMP19]] ; %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic ret double %result @@ -1323,7 +1169,7 @@ define amdgpu_ps double @global_atomic_fmin_double_uni_address_div_value_agent_s define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP26:%.*]] +; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] ; IR-ITERATIVE: 2: ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -1338,26 +1184,18 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_ ; IR-ITERATIVE-NEXT: br label [[TMP12]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast double [[TMP13]] to i64 -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP14]], 32 -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP15]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP17]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP19]], i32 1 -; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast <2 x i32> [[TMP21]] to double -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] -; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP22]], double [[TMP24]], metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: br label [[TMP26]] -; IR-ITERATIVE: 26: -; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP25]], [[TMP12]] ] -; IR-ITERATIVE-NEXT: ret double [[TMP27]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] +; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: br label [[TMP18]] +; IR-ITERATIVE: 18: +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] +; IR-ITERATIVE-NEXT: ret double [[TMP19]] ; ; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP26:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -1372,22 +1210,14 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_ ; IR-DPP-NEXT: br label [[TMP12]] ; IR-DPP: 12: ; IR-DPP-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-DPP-NEXT: [[TMP14:%.*]] = bitcast double [[TMP13]] to i64 -; IR-DPP-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-DPP-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP14]], 32 -; IR-DPP-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 -; IR-DPP-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP15]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP17]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; IR-DPP-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP19]], i32 1 -; IR-DPP-NEXT: [[TMP22:%.*]] = bitcast <2 x i32> [[TMP21]] to double -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] -; IR-DPP-NEXT: [[TMP25:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP22]], double [[TMP24]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: br label [[TMP26]] -; IR-DPP: 26: -; IR-DPP-NEXT: [[TMP27:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP25]], [[TMP12]] ] -; IR-DPP-NEXT: ret double [[TMP27]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: br label [[TMP18]] +; IR-DPP: 18: +; IR-DPP-NEXT: [[TMP19:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ] +; IR-DPP-NEXT: ret double [[TMP19]] ; %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic ret double %result diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll index 38823681d1bb56..d1e50bd560cb23 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll @@ -59,27 +59,25 @@ define amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) # ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 8: -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP16:%.*]] seq_cst, align 4 +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP14:%.*]] seq_cst, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: ret void ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP16]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP19:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP14]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP17:%.*]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 -; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = bitcast float [[DIVVALUE]] to i32 -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP13]], i32 [[TMP12]]) -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP14]] to float -; IR-ITERATIVE-NEXT: [[TMP16]] = fadd float [[ACCUMULATOR]], [[TMP15]] -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = shl i64 1, [[TMP11]] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], -1 -; IR-ITERATIVE-NEXT: [[TMP19]] = and i64 [[ACTIVEBITS]], [[TMP18]] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP20]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[DIVVALUE]], i32 [[TMP12]]) +; IR-ITERATIVE-NEXT: [[TMP14]] = fadd float [[ACCUMULATOR]], [[TMP13]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = shl i64 1, [[TMP11]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], -1 +; IR-ITERATIVE-NEXT: [[TMP17]] = and i64 [[ACTIVEBITS]], [[TMP16]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP18]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP6]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[TMP8:%.*]], label [[TMP10]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP6]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP19]], label [[TMP8:%.*]], label [[TMP10]] ; ; IR-DPP-LABEL: @global_atomic_fadd_div_value( ; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() @@ -90,32 +88,27 @@ define amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) # ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) ; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) -; IR-DPP-NEXT: [[TMP7:%.*]] = bitcast float [[DIVVALUE]] to i32 -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP7]], i32 -2147483648) -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; IR-DPP-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP7]] to float -; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP12:%.*]] = fadd float [[TMP9]], [[TMP11]] -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP12]], i32 274, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP12]], [[TMP13]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 276, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 280, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 322, i32 10, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 323, i32 12, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]] -; IR-DPP-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32 -; IR-DPP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP23]], i32 63) -; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast i32 [[TMP24]] to float -; IR-DPP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) -; IR-DPP-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP6]], 0 -; IR-DPP-NEXT: br i1 [[TMP27]], label [[TMP28:%.*]], label [[TMP30:%.*]] -; IR-DPP: 28: -; IR-DPP-NEXT: [[TMP29:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP26]] seq_cst, align 4 -; IR-DPP-NEXT: br label [[TMP30]] -; IR-DPP: 30: +; IR-DPP-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[DIVVALUE]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP7]], i32 273, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 274, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 276, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 280, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 322, i32 10, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 323, i32 12, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP19]], i32 63) +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP20]]) +; IR-DPP-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP6]], 0 +; IR-DPP-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]] +; IR-DPP: 23: +; IR-DPP-NEXT: [[TMP24:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP21]] seq_cst, align 4 +; IR-DPP-NEXT: br label [[TMP25]] +; IR-DPP: 25: ; IR-DPP-NEXT: ret void ; %id.x = call i32 @llvm.amdgcn.workitem.id.x() @@ -181,27 +174,25 @@ define amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) # ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 8: -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP16:%.*]] seq_cst, align 4 +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP14:%.*]] seq_cst, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: ret void ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP16]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP19:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP14]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP17:%.*]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 -; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = bitcast float [[DIVVALUE]] to i32 -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP13]], i32 [[TMP12]]) -; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP14]] to float -; IR-ITERATIVE-NEXT: [[TMP16]] = fadd float [[ACCUMULATOR]], [[TMP15]] -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = shl i64 1, [[TMP11]] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], -1 -; IR-ITERATIVE-NEXT: [[TMP19]] = and i64 [[ACTIVEBITS]], [[TMP18]] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP20]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[DIVVALUE]], i32 [[TMP12]]) +; IR-ITERATIVE-NEXT: [[TMP14]] = fadd float [[ACCUMULATOR]], [[TMP13]] +; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = shl i64 1, [[TMP11]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], -1 +; IR-ITERATIVE-NEXT: [[TMP17]] = and i64 [[ACTIVEBITS]], [[TMP16]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP18]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP6]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[TMP8:%.*]], label [[TMP10]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP6]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP19]], label [[TMP8:%.*]], label [[TMP10]] ; ; IR-DPP-LABEL: @global_atomic_fsub_div_value( ; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() @@ -212,32 +203,27 @@ define amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) # ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) ; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) -; IR-DPP-NEXT: [[TMP7:%.*]] = bitcast float [[DIVVALUE]] to i32 -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP7]], i32 -2147483648) -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; IR-DPP-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP7]] to float -; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP12:%.*]] = fadd float [[TMP9]], [[TMP11]] -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP12]], i32 274, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP12]], [[TMP13]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 276, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 280, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 322, i32 10, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 323, i32 12, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]] -; IR-DPP-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32 -; IR-DPP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP23]], i32 63) -; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast i32 [[TMP24]] to float -; IR-DPP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) -; IR-DPP-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP6]], 0 -; IR-DPP-NEXT: br i1 [[TMP27]], label [[TMP28:%.*]], label [[TMP30:%.*]] -; IR-DPP: 28: -; IR-DPP-NEXT: [[TMP29:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP26]] seq_cst, align 4 -; IR-DPP-NEXT: br label [[TMP30]] -; IR-DPP: 30: +; IR-DPP-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[DIVVALUE]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP7]], i32 273, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 274, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 276, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 280, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 322, i32 10, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 323, i32 12, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP19]], i32 63) +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP20]]) +; IR-DPP-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP6]], 0 +; IR-DPP-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]] +; IR-DPP: 23: +; IR-DPP-NEXT: [[TMP24:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP21]] seq_cst, align 4 +; IR-DPP-NEXT: br label [[TMP25]] +; IR-DPP: 25: ; IR-DPP-NEXT: ret void ; %id.x = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll index 3cf2a85b1f0086..dfc831cb5050a2 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll @@ -49,33 +49,31 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_scope_agent_scop ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: br label [[TMP13]] ; IR-ITERATIVE: 13: ; IR-ITERATIVE-NEXT: ret void ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP16]], i32 [[TMP15]]) -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float -; IR-ITERATIVE-NEXT: [[TMP19]] = fadd float [[ACCUMULATOR]], [[TMP18]] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1 -; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]] -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) +; IR-ITERATIVE-NEXT: [[TMP17]] = fadd float [[ACCUMULATOR]], [[TMP16]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 +; IR-ITERATIVE-NEXT: [[TMP20]] = and i64 [[ACTIVEBITS]], [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -83,34 +81,29 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_scope_agent_scop ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP11]], [[TMP13]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP24:%.*]] = fadd float [[TMP22]], [[TMP23]] -; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP25]], i32 63) -; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float -; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) -; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; IR-DPP: 30: -; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("agent") monotonic, align 4 -; IR-DPP-NEXT: br label [[TMP32]] -; IR-DPP: 32: -; IR-DPP-NEXT: br label [[TMP33]] -; IR-DPP: 33: +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP21:%.*]] = fadd float [[TMP19]], [[TMP20]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) +; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] +; IR-DPP: 25: +; IR-DPP-NEXT: [[TMP26:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP27]] +; IR-DPP: 27: +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: ; IR-DPP-NEXT: ret void ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4 @@ -184,33 +177,31 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_uns ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("one-as") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] syncscope("one-as") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: br label [[TMP13]] ; IR-ITERATIVE: 13: ; IR-ITERATIVE-NEXT: ret void ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP16]], i32 [[TMP15]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float -; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1 -; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]] -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 +; IR-ITERATIVE-NEXT: [[TMP20]] = and i64 [[ACTIVEBITS]], [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -218,34 +209,29 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_uns ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP25]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float -; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; IR-DPP: 30: -; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("one-as") monotonic, align 4 -; IR-DPP-NEXT: br label [[TMP32]] -; IR-DPP: 32: -; IR-DPP-NEXT: br label [[TMP33]] -; IR-DPP: 33: +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] +; IR-DPP: 25: +; IR-DPP-NEXT: [[TMP26:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23]] syncscope("one-as") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP27]] +; IR-DPP: 27: +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: ; IR-DPP-NEXT: ret void ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic @@ -319,33 +305,31 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_stri ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: br label [[TMP13]] ; IR-ITERATIVE: 13: ; IR-ITERATIVE-NEXT: ret void ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP16]], i32 [[TMP15]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float -; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1 -; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]] -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 +; IR-ITERATIVE-NEXT: [[TMP20]] = and i64 [[ACTIVEBITS]], [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -353,34 +337,29 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_stri ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP25]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float -; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; IR-DPP: 30: -; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("agent") monotonic, align 4 -; IR-DPP-NEXT: br label [[TMP32]] -; IR-DPP: 32: -; IR-DPP-NEXT: br label [[TMP33]] -; IR-DPP: 33: +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] +; IR-DPP: 25: +; IR-DPP-NEXT: [[TMP26:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP23]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP27]] +; IR-DPP: 27: +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: ; IR-DPP-NEXT: ret void ; %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic @@ -426,33 +405,31 @@ define amdgpu_ps void @global_atomic_fmin_uni_address_div_value_agent_scope_unsa ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: br label [[TMP13]] ; IR-ITERATIVE: 13: ; IR-ITERATIVE-NEXT: ret void ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP16]], i32 [[TMP15]]) -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float -; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP18]]) -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1 -; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]] -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) +; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP16]]) +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 +; IR-ITERATIVE-NEXT: [[TMP20]] = and i64 [[ACTIVEBITS]], [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -460,34 +437,29 @@ define amdgpu_ps void @global_atomic_fmin_uni_address_div_value_agent_scope_unsa ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 2143289344) -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP13]]) -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP15]]) -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP17]]) -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.minnum.f32(float [[TMP18]], float [[TMP19]]) -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.minnum.f32(float [[TMP20]], float [[TMP21]]) -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.minnum.f32(float [[TMP22]], float [[TMP23]]) -; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP25]], i32 63) -; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float -; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) -; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; IR-DPP: 30: -; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("agent") monotonic, align 4 -; IR-DPP-NEXT: br label [[TMP32]] -; IR-DPP: 32: -; IR-DPP-NEXT: br label [[TMP33]] -; IR-DPP: 33: +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.minnum.f32(float [[TMP9]], float [[TMP10]]) +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP12]]) +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP13]], float [[TMP14]]) +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP15]], float [[TMP16]]) +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP17]], float [[TMP18]]) +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.minnum.f32(float [[TMP19]], float [[TMP20]]) +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) +; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] +; IR-DPP: 25: +; IR-DPP-NEXT: [[TMP26:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP23]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP27]] +; IR-DPP: 27: +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: ; IR-DPP-NEXT: ret void ; %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic @@ -553,33 +525,31 @@ define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsa ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("agent") monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: br label [[TMP13]] ; IR-ITERATIVE: 13: ; IR-ITERATIVE-NEXT: ret void ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP16]], i32 [[TMP15]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float -; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1 -; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]] -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 +; IR-ITERATIVE-NEXT: [[TMP20]] = and i64 [[ACTIVEBITS]], [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -587,34 +557,29 @@ define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsa ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 2143289344) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP13]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP15]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP17]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP18]], float [[TMP19]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP20]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP22]], float [[TMP23]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP25]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float -; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; IR-DPP: 30: -; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("agent") monotonic, align 4 -; IR-DPP-NEXT: br label [[TMP32]] -; IR-DPP: 32: -; IR-DPP-NEXT: br label [[TMP33]] -; IR-DPP: 33: +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP9]], float [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP13]], float [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP15]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP17]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP19]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] +; IR-DPP: 25: +; IR-DPP-NEXT: [[TMP26:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP23]] syncscope("agent") monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP27]] +; IR-DPP: 27: +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: ; IR-DPP-NEXT: ret void ; %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic @@ -688,33 +653,31 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_str ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] monotonic, align 4 +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: br label [[TMP13]] ; IR-ITERATIVE: 13: ; IR-ITERATIVE-NEXT: ret void ; IR-ITERATIVE: ComputeLoop: -; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] +; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP16]], i32 [[TMP15]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float -; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]] -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1 -; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]] -; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 +; IR-ITERATIVE-NEXT: [[TMP20]] = and i64 [[ACTIVEBITS]], [[TMP19]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] ; IR-ITERATIVE: ComputeEnd: -; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]] +; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]] +; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -722,34 +685,29 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_str ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32 -; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float -; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32 -; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP25]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float -; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; IR-DPP: 30: -; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] monotonic, align 4 -; IR-DPP-NEXT: br label [[TMP32]] -; IR-DPP: 32: -; IR-DPP-NEXT: br label [[TMP33]] -; IR-DPP: 33: +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] +; IR-DPP: 25: +; IR-DPP-NEXT: [[TMP26:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23]] monotonic, align 4 +; IR-DPP-NEXT: br label [[TMP27]] +; IR-DPP: 27: +; IR-DPP-NEXT: br label [[TMP28]] +; IR-DPP: 28: ; IR-DPP-NEXT: ret void ; %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index 6555ceb3ed3386..04df04a5c299b3 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -706,35 +706,37 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -1910,35 +1912,37 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -3114,35 +3118,37 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -3834,35 +3840,37 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -5037,35 +5045,37 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll index e02d749f1c7a76..005cd3a0021b39 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll @@ -607,42 +607,44 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -1726,42 +1728,44 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -2905,42 +2909,44 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll index 60195ca993312b..3f4779f08e42fe 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll @@ -607,42 +607,44 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: v_min_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -1726,42 +1728,44 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: v_min_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -2905,42 +2909,44 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 0x7fc00000 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf ; GFX9-DPP-NEXT: v_max_f32_e32 v5, v5, v5 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_min_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-DPP-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX9-DPP-NEXT: v_min_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll index 5cb57703c01d99..64650e2733a00d 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll @@ -784,35 +784,37 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -2014,35 +2016,37 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -3244,35 +3248,37 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -4016,35 +4022,37 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 @@ -5245,35 +5253,37 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-DPP-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-DPP-NEXT: s_not_b64 exec, exec +; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 ; GFX9-DPP-NEXT: s_not_b64 exec, exec ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-DPP-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-DPP-NEXT: s_nop 0 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:1 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: s_nop 1 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:1 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:2 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_shr:8 row_mask:0xf bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: v_bfrev_b32_e32 v5, 1 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v3 row_bcast:15 row_mask:0xa bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX9-DPP-NEXT: v_mov_b32_dpp v5, v4 row_bcast:15 row_mask:0xa bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX9-DPP-NEXT: s_nop 1 -; GFX9-DPP-NEXT: v_mov_b32_dpp v4, v3 row_bcast:31 row_mask:0xc bank_mask:0xf -; GFX9-DPP-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX9-DPP-NEXT: v_mov_b32_dpp v3, v4 row_bcast:31 row_mask:0xc bank_mask:0xf +; GFX9-DPP-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX9-DPP-NEXT: v_readlane_b32 s4, v3, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir index fea02822da8bfa..c8fee5d334429e 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir @@ -150,22 +150,23 @@ body: | %1(s8) = G_TRUNC %0(s32) ; Check that the operation is performed for 32 bits - ; CLZ: [[COUNT:%[0-9]+]]:_(s32) = G_CTLZ - ; CLZ-NOT: G_CTLZ_ZERO_UNDEF + ; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SHL [[X32]], [[BITDIFF]] ; LIBCALLS-NOT: G_CTLZ ; LIBCALLS: ADJCALLSTACKDOWN - ; LIBCALLS: $r0 = COPY [[X32]] + ; LIBCALLS: $r0 = COPY [[R32]] ; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0 ; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = COPY $r0 ; LIBCALLS: ADJCALLSTACKUP ; LIBCALLS-NOT: G_CTLZ - ; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SUB [[COUNT]], [[BITDIFF]] + ; CLZ: [[COUNT:%[0-9]+]]:_(s32) = G_CTLZ [[R32]] + ; CLZ-NOT: G_CTLZ_ZERO_UNDEF %2(s8) = G_CTLZ_ZERO_UNDEF %1 - ; CHECK: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[R32]], [[BITDIFF]] - ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]] - ; CHECK: $r0 = COPY [[R]] + ; LIBCALLS: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[COUNT]], [[BITDIFF]] + ; LIBCALLS: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]] + ; CLZ: $r0 = COPY [[COUNT]] + ; LIBCALLS: $r0 = COPY [[R]] %3(s32) = G_SEXT %2(s8) $r0 = COPY %3(s32) BX_RET 14, $noreg, implicit $r0 diff --git a/llvm/test/CodeGen/ARM/sdiv_shl.ll b/llvm/test/CodeGen/ARM/sdiv_shl.ll new file mode 100644 index 00000000000000..01615ce2c46af5 --- /dev/null +++ b/llvm/test/CodeGen/ARM/sdiv_shl.ll @@ -0,0 +1,106 @@ +; RUN: llc -mtriple armv7-linux -mattr=+neon %s -o - | FileCheck %s --check-prefix=LE +; RUN: llc -mtriple armebv7-linux -mattr=+neon %s -o - | FileCheck %s --check-prefix=BE + +; The key is the last vrev64 should be vrev64.16 instead of vrev64.32 + +define void @sdiv_shl(ptr %x, ptr %y) nounwind { +; LE-LABEL: sdiv_shl: +; LE: @ %bb.0: @ %entry +; LE-NEXT: adr r2, .LCPI0_0 +; LE-NEXT: vld1.64 {d18, d19}, [r1] +; LE-NEXT: adr r1, .LCPI0_1 +; LE-NEXT: vld1.64 {d16, d17}, [r2:128] +; LE-NEXT: vshr.s16 q10, q9, #15 +; LE-NEXT: vneg.s16 q8, q8 +; LE-NEXT: vld1.64 {d22, d23}, [r1:128] +; LE-NEXT: adr r1, .LCPI0_2 +; LE-NEXT: vshl.u16 q8, q10, q8 +; LE-NEXT: vneg.s16 q10, q11 +; LE-NEXT: vadd.i16 q8, q9, q8 +; LE-NEXT: vshl.s16 q8, q8, q10 +; LE-NEXT: vld1.64 {d20, d21}, [r1:128] +; LE-NEXT: vbit q8, q9, q10 +; LE-NEXT: vst1.64 {d16, d17}, [r0] +; LE: .LCPI0_0: +; LE-NEXT: .short 16 @ 0x10 +; LE-NEXT: .short 14 @ 0xe +; LE-NEXT: .short 15 @ 0xf +; LE-NEXT: .short 13 @ 0xd +; LE-NEXT: .short 12 @ 0xc +; LE-NEXT: .short 10 @ 0xa +; LE-NEXT: .short 11 @ 0xb +; LE-NEXT: .short 9 @ 0x9 +; LE-NEXT: .LCPI0_1: +; LE-NEXT: .short 0 @ 0x0 +; LE-NEXT: .short 2 @ 0x2 +; LE-NEXT: .short 1 @ 0x1 +; LE-NEXT: .short 3 @ 0x3 +; LE-NEXT: .short 4 @ 0x4 +; LE-NEXT: .short 6 @ 0x6 +; LE-NEXT: .short 5 @ 0x5 +; LE-NEXT: .short 7 @ 0x7 +; LE-NEXT: .LCPI0_2: +; LE-NEXT: .short 65535 @ 0xffff +; LE-NEXT: .short 0 @ 0x0 +; LE-NEXT: .short 0 @ 0x0 +; LE-NEXT: .short 0 @ 0x0 +; LE-NEXT: .short 0 @ 0x0 +; LE-NEXT: .short 0 @ 0x0 +; LE-NEXT: .short 0 @ 0x0 +; LE-NEXT: .short 0 @ 0x0 +; +; BE-LABEL: sdiv_shl: +; BE: @ %bb.0: @ %entry +; BE-NEXT: adr r2, .LCPI0_0 +; BE-NEXT: vld1.64 {d18, d19}, [r1] +; BE-NEXT: adr r1, .LCPI0_1 +; BE-NEXT: vld1.64 {d16, d17}, [r2:128] +; BE-NEXT: vrev64.16 q8, q8 +; BE-NEXT: vrev64.16 q9, q9 +; BE-NEXT: vneg.s16 q8, q8 +; BE-NEXT: vld1.64 {d20, d21}, [r1:128] +; BE-NEXT: adr r1, .LCPI0_2 +; BE-NEXT: vshr.s16 q11, q9, #15 +; BE-NEXT: vrev64.16 q10, q10 +; BE-NEXT: vshl.u16 q8, q11, q8 +; BE-NEXT: vld1.64 {d22, d23}, [r1:128] +; BE-NEXT: vneg.s16 q10, q10 +; BE-NEXT: vrev64.16 q11, q11 +; BE-NEXT: vadd.i16 q8, q9, q8 +; BE-NEXT: vshl.s16 q8, q8, q10 +; BE-NEXT: vbit q8, q9, q11 +; BE-NEXT: vrev64.16 q8, q8 +; BE-NEXT: vst1.64 {d16, d17}, [r0] +; BE: .LCPI0_0: +; BE-NEXT: .short 16 @ 0x10 +; BE-NEXT: .short 14 @ 0xe +; BE-NEXT: .short 15 @ 0xf +; BE-NEXT: .short 13 @ 0xd +; BE-NEXT: .short 12 @ 0xc +; BE-NEXT: .short 10 @ 0xa +; BE-NEXT: .short 11 @ 0xb +; BE-NEXT: .short 9 @ 0x9 +; BE-NEXT: .LCPI0_1: +; BE-NEXT: .short 0 @ 0x0 +; BE-NEXT: .short 2 @ 0x2 +; BE-NEXT: .short 1 @ 0x1 +; BE-NEXT: .short 3 @ 0x3 +; BE-NEXT: .short 4 @ 0x4 +; BE-NEXT: .short 6 @ 0x6 +; BE-NEXT: .short 5 @ 0x5 +; BE-NEXT: .short 7 @ 0x7 +; BE-NEXT: .LCPI0_2: +; BE-NEXT: .short 65535 @ 0xffff +; BE-NEXT: .short 0 @ 0x0 +; BE-NEXT: .short 0 @ 0x0 +; BE-NEXT: .short 0 @ 0x0 +; BE-NEXT: .short 0 @ 0x0 +; BE-NEXT: .short 0 @ 0x0 +; BE-NEXT: .short 0 @ 0x0 +; BE-NEXT: .short 0 @ 0x0 +entry: + %0 = load <8 x i16>, ptr %y, align 8 + %div = sdiv <8 x i16> %0, + store <8 x i16> %div, ptr %x, align 8 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/bitmanip.ll b/llvm/test/CodeGen/Hexagon/bitmanip.ll index 9ce7f0576506c3..2c21af62d6f394 100644 --- a/llvm/test/CodeGen/Hexagon/bitmanip.ll +++ b/llvm/test/CodeGen/Hexagon/bitmanip.ll @@ -50,13 +50,10 @@ define i16 @ctlz_i16(i16 %a0) #0 { ; CHECK: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r0 = zxth(r0) +; CHECK-NEXT: r0 = aslh(r0) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r0 = cl0(r0) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = add(r0,#-16) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } %v0 = tail call i16 @llvm.ctlz.i16(i16 %a0, i1 true) #1 diff --git a/llvm/test/CodeGen/PowerPC/aix-available-externally-linkage-fun.ll b/llvm/test/CodeGen/PowerPC/aix-available-externally-linkage-fun.ll index 0ad229004b8a3d..fd39fb6d165907 100644 --- a/llvm/test/CodeGen/PowerPC/aix-available-externally-linkage-fun.ll +++ b/llvm/test/CodeGen/PowerPC/aix-available-externally-linkage-fun.ll @@ -18,7 +18,6 @@ entry: } ; CHECK: .extern .foo[PR] -; CHECK: .extern foo[DS] ; OBJ: Name: .foo ; OBJ-NEXT: Value (RelocatableAddress): 0x0 @@ -34,18 +33,3 @@ entry: ; OBJ-NEXT: SymbolAlignmentLog2: 0 ; OBJ-NEXT: SymbolType: XTY_ER (0x0) ; OBJ-NEXT: StorageMappingClass: XMC_PR (0x0) - -; OBJ: Name: foo -; OBJ-NEXT: Value (RelocatableAddress): 0x0 -; OBJ-NEXT: Section: N_UNDEF -; OBJ-NEXT: Type: 0x0 -; OBJ-NEXT: StorageClass: C_EXT (0x2) -; OBJ-NEXT: NumberOfAuxEntries: 1 -; OBJ-NEXT: CSECT Auxiliary Entry { -; OBJ-NEXT: Index: [[#NFA+4]] -; OBJ-NEXT: SectionLen: 0 -; OBJ-NEXT: ParameterHashIndex: 0x0 -; OBJ-NEXT: TypeChkSectNum: 0x0 -; OBJ-NEXT: SymbolAlignmentLog2: 0 -; OBJ-NEXT: SymbolType: XTY_ER (0x0) -; OBJ-NEXT: StorageMappingClass: XMC_DS (0xA) diff --git a/llvm/test/CodeGen/PowerPC/aix-extern-weak.ll b/llvm/test/CodeGen/PowerPC/aix-extern-weak.ll index ea61fdb022b5c5..173c58567e40b3 100644 --- a/llvm/test/CodeGen/PowerPC/aix-extern-weak.ll +++ b/llvm/test/CodeGen/PowerPC/aix-extern-weak.ll @@ -55,7 +55,6 @@ declare extern_weak void @foo_ext_weak(ptr) ; COMMON-NEXT: .weak .foo_ext_weak_ref[PR] ; COMMON-NEXT: .weak foo_ext_weak_ref[DS] ; COMMON-NEXT: .weak .foo_ext_weak[PR] -; COMMON-NEXT: .weak foo_ext_weak[DS] ; COMMON-NEXT: .toc ; COMMON-NEXT: L..C0: ; COMMON-NEXT: .tc foo_ext_weak_p[TC],foo_ext_weak_p @@ -159,27 +158,6 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { ; CHECKSYM-NEXT: Index: [[#Index+8]] -; CHECKSYM-NEXT: Name: foo_ext_weak -; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 -; CHECKSYM-NEXT: Section: N_UNDEF -; CHECKSYM-NEXT: Type: 0x0 -; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) -; CHECKSYM-NEXT: NumberOfAuxEntries: 1 -; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+9]] -; CHECKSYM-NEXT: SectionLen: 0 -; CHECKSYM-NEXT: ParameterHashIndex: 0x0 -; CHECKSYM-NEXT: TypeChkSectNum: 0x0 -; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 -; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) -; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) -; CHECKSYM32-NEXT: StabInfoIndex: 0x0 -; CHECKSYM32-NEXT: StabSectNum: 0x0 -; CHECKSYM64-NEXT: Auxiliary Type: AUX_CSECT (0xFB) -; CHECKSYM-NEXT: } -; CHECKSYM-NEXT: } -; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+10]] ; CHECKSYM-NEXT: Name: ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 ; CHECKSYM-NEXT: Section: .text @@ -187,7 +165,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+11]] +; CHECKSYM-NEXT: Index: [[#Index+9]] ; CHECKSYM-NEXT: SectionLen: 80 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 @@ -200,7 +178,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+12]] +; CHECKSYM-NEXT: Index: [[#Index+10]] ; CHECKSYM-NEXT: Name: .main ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 ; CHECKSYM-NEXT: Section: .text @@ -208,8 +186,8 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+13]] -; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+10]] +; CHECKSYM-NEXT: Index: [[#Index+11]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+8]] ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 ; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 @@ -221,7 +199,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+14]] +; CHECKSYM-NEXT: Index: [[#Index+12]] ; CHECKSYM-NEXT: Name: .data ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x50 ; CHECKSYM-NEXT: Section: .data @@ -229,7 +207,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+15]] +; CHECKSYM-NEXT: Index: [[#Index+13]] ; CHECKSYM32-NEXT: SectionLen: 4 ; CHECKSYM64-NEXT: SectionLen: 8 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 @@ -244,7 +222,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+16]] +; CHECKSYM-NEXT: Index: [[#Index+14]] ; CHECKSYM-NEXT: Name: foo_ext_weak_p ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x50 ; CHECKSYM-NEXT: Section: .data @@ -252,8 +230,8 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+17]] -; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+14]] +; CHECKSYM-NEXT: Index: [[#Index+15]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+12]] ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 ; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 @@ -265,7 +243,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+18]] +; CHECKSYM-NEXT: Index: [[#Index+16]] ; CHECKSYM-NEXT: Name: main ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x54 ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0x58 @@ -274,7 +252,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+19]] +; CHECKSYM-NEXT: Index: [[#Index+17]] ; CHECKSYM32-NEXT: SectionLen: 12 ; CHECKSYM64-NEXT: SectionLen: 24 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 @@ -289,7 +267,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+20]] +; CHECKSYM-NEXT: Index: [[#Index+18]] ; CHECKSYM-NEXT: Name: TOC ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x60 ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0x70 @@ -298,7 +276,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+21]] +; CHECKSYM-NEXT: Index: [[#Index+19]] ; CHECKSYM-NEXT: SectionLen: 0 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 @@ -311,7 +289,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+22]] +; CHECKSYM-NEXT: Index: [[#Index+20]] ; CHECKSYM-NEXT: Name: foo_ext_weak_p ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x60 ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0x70 @@ -320,7 +298,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+23]] +; CHECKSYM-NEXT: Index: [[#Index+21]] ; CHECKSYM32-NEXT: SectionLen: 4 ; CHECKSYM64-NEXT: SectionLen: 8 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 @@ -335,7 +313,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+24]] +; CHECKSYM-NEXT: Index: [[#Index+22]] ; CHECKSYM-NEXT: Name: b_w ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x64 ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0x78 @@ -344,7 +322,7 @@ declare extern_weak void @foo_ext_weak(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+25]] +; CHECKSYM-NEXT: Index: [[#Index+23]] ; CHECKSYM32-NEXT: SectionLen: 4 ; CHECKSYM64-NEXT: SectionLen: 8 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 diff --git a/llvm/test/CodeGen/PowerPC/aix-extern.ll b/llvm/test/CodeGen/PowerPC/aix-extern.ll index b4366dddedb2fe..ff2a803608807c 100644 --- a/llvm/test/CodeGen/PowerPC/aix-extern.ll +++ b/llvm/test/CodeGen/PowerPC/aix-extern.ll @@ -78,7 +78,6 @@ declare i32 @bar_extern(ptr) ; COMMON-NEXT: .extern .bar_ref ; COMMON-NEXT: .extern bar_ref[DS] ; COMMON-NEXT: .extern .bar_extern -; COMMON-NEXT: .extern bar_extern[DS] ; COMMON-NEXT: .toc ; COMMON-NEXT: L..C0: ; COMMON-NEXT: .tc b_e[TC],b_e[UA] @@ -182,27 +181,6 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { ; CHECKSYM-NEXT: Index: [[#Index+8]] -; CHECKSYM-NEXT: Name: bar_extern -; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 -; CHECKSYM-NEXT: Section: N_UNDEF -; CHECKSYM-NEXT: Type: 0x0 -; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) -; CHECKSYM-NEXT: NumberOfAuxEntries: 1 -; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+9]] -; CHECKSYM-NEXT: SectionLen: 0 -; CHECKSYM-NEXT: ParameterHashIndex: 0x0 -; CHECKSYM-NEXT: TypeChkSectNum: 0x0 -; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 -; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) -; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) -; CHECKSYM32-NEXT: StabInfoIndex: 0x0 -; CHECKSYM32-NEXT: StabSectNum: 0x0 -; CHECKSYM64-NEXT: Auxiliary Type: AUX_CSECT (0xFB) -; CHECKSYM-NEXT: } -; CHECKSYM-NEXT: } -; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+10]] ; CHECKSYM-NEXT: Name: ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 ; CHECKSYM-NEXT: Section: .text @@ -210,7 +188,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+11]] +; CHECKSYM-NEXT: Index: [[#Index+9]] ; CHECKSYM-NEXT: SectionLen: 112 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 @@ -223,7 +201,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+12]] +; CHECKSYM-NEXT: Index: [[#Index+10]] ; CHECKSYM-NEXT: Name: .foo ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 ; CHECKSYM-NEXT: Section: .text @@ -231,8 +209,8 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+13]] -; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+10]] +; CHECKSYM-NEXT: Index: [[#Index+11]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+8]] ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 ; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 @@ -244,7 +222,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+14]] +; CHECKSYM-NEXT: Index: [[#Index+12]] ; CHECKSYM-NEXT: Name: .main ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x10 ; CHECKSYM-NEXT: Section: .text @@ -252,8 +230,8 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+15]] -; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+10]] +; CHECKSYM-NEXT: Index: [[#Index+13]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+8]] ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 ; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 @@ -265,7 +243,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+16]] +; CHECKSYM-NEXT: Index: [[#Index+14]] ; CHECKSYM-NEXT: Name: .data ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x70 ; CHECKSYM-NEXT: Section: .data @@ -273,7 +251,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+17]] +; CHECKSYM-NEXT: Index: [[#Index+15]] ; CHECKSYM32-NEXT: SectionLen: 4 ; CHECKSYM64-NEXT: SectionLen: 8 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 @@ -288,7 +266,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+18]] +; CHECKSYM-NEXT: Index: [[#Index+16]] ; CHECKSYM-NEXT: Name: bar_p ; CHECKSYM-NEXT: Value (RelocatableAddress): 0x70 ; CHECKSYM-NEXT: Section: .data @@ -296,8 +274,8 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+19]] -; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+16]] +; CHECKSYM-NEXT: Index: [[#Index+17]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+14]] ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 ; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 @@ -309,7 +287,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+20]] +; CHECKSYM-NEXT: Index: [[#Index+18]] ; CHECKSYM-NEXT: Name: foo ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x74 ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0x78 @@ -318,7 +296,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+21]] +; CHECKSYM-NEXT: Index: [[#Index+19]] ; CHECKSYM32-NEXT: SectionLen: 12 ; CHECKSYM64-NEXT: SectionLen: 24 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 @@ -333,7 +311,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+22]] +; CHECKSYM-NEXT: Index: [[#Index+20]] ; CHECKSYM-NEXT: Name: main ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x80 ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0x90 @@ -342,7 +320,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+23]] +; CHECKSYM-NEXT: Index: [[#Index+21]] ; CHECKSYM32-NEXT: SectionLen: 12 ; CHECKSYM64-NEXT: SectionLen: 24 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 @@ -357,7 +335,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+24]] +; CHECKSYM-NEXT: Index: [[#Index+22]] ; CHECKSYM-NEXT: Name: TOC ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x8C ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0xA8 @@ -366,7 +344,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+25]] +; CHECKSYM-NEXT: Index: [[#Index+23]] ; CHECKSYM-NEXT: SectionLen: 0 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 ; CHECKSYM-NEXT: TypeChkSectNum: 0x0 @@ -379,7 +357,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+26]] +; CHECKSYM-NEXT: Index: [[#Index+24]] ; CHECKSYM-NEXT: Name: b_e ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x8C ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0xA8 @@ -388,7 +366,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+27]] +; CHECKSYM-NEXT: Index: [[#Index+25]] ; CHECKSYM32-NEXT: SectionLen: 4 ; CHECKSYM64-NEXT: SectionLen: 8 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 @@ -403,7 +381,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: } ; CHECKSYM-NEXT: Symbol { -; CHECKSYM-NEXT: Index: [[#Index+28]] +; CHECKSYM-NEXT: Index: [[#Index+26]] ; CHECKSYM-NEXT: Name: bar_p ; CHECKSYM32-NEXT: Value (RelocatableAddress): 0x90 ; CHECKSYM64-NEXT: Value (RelocatableAddress): 0xB0 @@ -412,7 +390,7 @@ declare i32 @bar_extern(ptr) ; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; CHECKSYM-NEXT: NumberOfAuxEntries: 1 ; CHECKSYM-NEXT: CSECT Auxiliary Entry { -; CHECKSYM-NEXT: Index: [[#Index+29]] +; CHECKSYM-NEXT: Index: [[#Index+27]] ; CHECKSYM32-NEXT: SectionLen: 4 ; CHECKSYM64-NEXT: SectionLen: 8 ; CHECKSYM-NEXT: ParameterHashIndex: 0x0 diff --git a/llvm/test/CodeGen/PowerPC/aix-text-ref.ll b/llvm/test/CodeGen/PowerPC/aix-text-ref.ll index 175c8ccd2a49b2..0dfa6182402761 100644 --- a/llvm/test/CodeGen/PowerPC/aix-text-ref.ll +++ b/llvm/test/CodeGen/PowerPC/aix-text-ref.ll @@ -14,7 +14,5 @@ entry: declare i32 @text(...) ; CHECK32: 00000000 *UND* 00000000 (idx: {{[[:digit:]]*}}) .text[PR] -; CHECK32: 00000000 *UND* 00000000 (idx: {{[[:digit:]]*}}) text[DS] ; CHECK64: 0000000000000000 *UND* 0000000000000000 (idx: {{[[:digit:]]*}}) .text[PR] -; CHECK64: 0000000000000000 *UND* 0000000000000000 (idx: {{[[:digit:]]*}}) text[DS] diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll index 6599debbd41b4e..950d65b7e2913e 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll @@ -38,7 +38,7 @@ declare i32 @bar(i32) ; OBJ-NEXT: TimeStamp: None (0x0) ; OBJ32-NEXT: SymbolTableOffset: 0x13C ; OBJ64-NEXT: SymbolTableOffset: 0x1B8 -; OBJ-NEXT: SymbolTableEntries: [[#NFA+27]] +; OBJ-NEXT: SymbolTableEntries: [[#NFA+25]] ; OBJ-NEXT: OptionalHeaderSize: 0x0 ; OBJ-NEXT: Flags: 0x0 ; OBJ-NEXT: } @@ -88,7 +88,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x1A -; RELOC-NEXT: Symbol: globalA ([[#NFA+23]]) +; RELOC-NEXT: Symbol: globalA ([[#NFA+21]]) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 16 @@ -96,7 +96,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x1E -; RELOC-NEXT: Symbol: globalB ([[#NFA+25]]) +; RELOC-NEXT: Symbol: globalB ([[#NFA+23]]) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 16 @@ -106,7 +106,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: Section (index: 2) .data { ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x70 -; RELOC-NEXT: Symbol: arr ([[#NFA+15]]) +; RELOC-NEXT: Symbol: arr ([[#NFA+13]]) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC32-NEXT: Length: 32 @@ -116,7 +116,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: Relocation { ; RELOC32-NEXT: Virtual Address: 0x74 ; RELOC64-NEXT: Virtual Address: 0x78 -; RELOC-NEXT: Symbol: .foo ([[#NFA+7]]) +; RELOC-NEXT: Symbol: .foo ([[#NFA+5]]) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC32-NEXT: Length: 32 @@ -126,7 +126,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: Relocation { ; RELOC32-NEXT: Virtual Address: 0x78 ; RELOC64-NEXT: Virtual Address: 0x80 -; RELOC-NEXT: Symbol: TOC ([[#NFA+21]]) +; RELOC-NEXT: Symbol: TOC ([[#NFA+19]]) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC32-NEXT: Length: 32 @@ -136,7 +136,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: Relocation { ; RELOC32-NEXT: Virtual Address: 0x80 ; RELOC64-NEXT: Virtual Address: 0x90 -; RELOC-NEXT: Symbol: globalA ([[#NFA+11]]) +; RELOC-NEXT: Symbol: globalA ([[#NFA+9]]) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC32-NEXT: Length: 32 @@ -146,7 +146,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: Relocation { ; RELOC32-NEXT: Virtual Address: 0x84 ; RELOC64-NEXT: Virtual Address: 0x98 -; RELOC-NEXT: Symbol: globalB ([[#NFA+13]]) +; RELOC-NEXT: Symbol: globalB ([[#NFA+11]]) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC32-NEXT: Length: 32 @@ -203,27 +203,6 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: [[#INDX+2]] -; SYM-NEXT: Name: bar -; SYM-NEXT: Value (RelocatableAddress): 0x0 -; SYM-NEXT: Section: N_UNDEF -; SYM-NEXT: Type: 0x0 -; SYM-NEXT: StorageClass: C_EXT (0x2) -; SYM-NEXT: NumberOfAuxEntries: 1 -; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+3]] -; SYM-NEXT: SectionLen: 0 -; SYM-NEXT: ParameterHashIndex: 0x0 -; SYM-NEXT: TypeChkSectNum: 0x0 -; SYM-NEXT: SymbolAlignmentLog2: 0 -; SYM-NEXT: SymbolType: XTY_ER (0x0) -; SYM-NEXT: StorageMappingClass: XMC_DS (0xA) -; SYM32-NEXT: StabInfoIndex: 0x0 -; SYM32-NEXT: StabSectNum: 0x0 -; SYM64-NEXT: Auxiliary Type: AUX_CSECT (0xFB) -; SYM-NEXT: } -; SYM-NEXT: } -; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+4]] ; SYM-NEXT: Name: ; SYM-NEXT: Value (RelocatableAddress): 0x0 ; SYM-NEXT: Section: .text @@ -231,7 +210,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+5]] +; SYM-NEXT: Index: [[#INDX+3]] ; SYM-NEXT: SectionLen: 64 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 @@ -244,7 +223,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+6]] +; SYM-NEXT: Index: [[#INDX+4]] ; SYM-NEXT: Name: .foo ; SYM-NEXT: Value (RelocatableAddress): 0x0 ; SYM-NEXT: Section: .text @@ -252,8 +231,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+7]] -; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+4]] +; SYM-NEXT: Index: [[#INDX+5]] +; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+2]] ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -265,7 +244,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+8]] +; SYM-NEXT: Index: [[#INDX+6]] ; SYM-NEXT: Name: .data ; SYM-NEXT: Value (RelocatableAddress): 0x40 ; SYM-NEXT: Section: .data @@ -273,7 +252,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+9]] +; SYM-NEXT: Index: [[#INDX+7]] ; SYM32-NEXT: SectionLen: 52 ; SYM64-NEXT: SectionLen: 56 ; SYM-NEXT: ParameterHashIndex: 0x0 @@ -288,7 +267,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+10]] +; SYM-NEXT: Index: [[#INDX+8]] ; SYM-NEXT: Name: globalA ; SYM-NEXT: Value (RelocatableAddress): 0x40 ; SYM-NEXT: Section: .data @@ -296,8 +275,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+11]] -; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+8]] +; SYM-NEXT: Index: [[#INDX+9]] +; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+6]] ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -309,7 +288,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+12]] +; SYM-NEXT: Index: [[#INDX+10]] ; SYM-NEXT: Name: globalB ; SYM-NEXT: Value (RelocatableAddress): 0x44 ; SYM-NEXT: Section: .data @@ -317,8 +296,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+13]] -; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+8]] +; SYM-NEXT: Index: [[#INDX+11]] +; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+6]] ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -330,7 +309,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+14]] +; SYM-NEXT: Index: [[#INDX+12]] ; SYM-NEXT: Name: arr ; SYM-NEXT: Value (RelocatableAddress): 0x48 ; SYM-NEXT: Section: .data @@ -338,8 +317,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+15]] -; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+8]] +; SYM-NEXT: Index: [[#INDX+13]] +; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+6]] ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -351,7 +330,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+16]] +; SYM-NEXT: Index: [[#INDX+14]] ; SYM-NEXT: Name: p ; SYM-NEXT: Value (RelocatableAddress): 0x70 ; SYM-NEXT: Section: .data @@ -359,8 +338,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+17]] -; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+8]] +; SYM-NEXT: Index: [[#INDX+15]] +; SYM-NEXT: ContainingCsectSymbolIndex: [[#INDX+6]] ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -372,7 +351,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+18]] +; SYM-NEXT: Index: [[#INDX+16]] ; SYM-NEXT: Name: foo ; SYM32-NEXT: Value (RelocatableAddress): 0x74 ; SYM64-NEXT: Value (RelocatableAddress): 0x78 @@ -381,7 +360,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+19]] +; SYM-NEXT: Index: [[#INDX+17]] ; SYM32-NEXT: SectionLen: 12 ; SYM64-NEXT: SectionLen: 24 ; SYM-NEXT: ParameterHashIndex: 0x0 @@ -396,7 +375,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+20]] +; SYM-NEXT: Index: [[#INDX+18]] ; SYM-NEXT: Name: TOC ; SYM32-NEXT: Value (RelocatableAddress): 0x80 ; SYM64-NEXT: Value (RelocatableAddress): 0x90 @@ -405,7 +384,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+21]] +; SYM-NEXT: Index: [[#INDX+19]] ; SYM-NEXT: SectionLen: 0 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 @@ -418,7 +397,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+22]] +; SYM-NEXT: Index: [[#INDX+20]] ; SYM-NEXT: Name: globalA ; SYM32-NEXT: Value (RelocatableAddress): 0x80 ; SYM64-NEXT: Value (RelocatableAddress): 0x90 @@ -427,7 +406,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+23]] +; SYM-NEXT: Index: [[#INDX+21]] ; SYM32-NEXT: SectionLen: 4 ; SYM64-NEXT: SectionLen: 8 ; SYM-NEXT: ParameterHashIndex: 0x0 @@ -442,7 +421,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: [[#INDX+24]] +; SYM-NEXT: Index: [[#INDX+22]] ; SYM-NEXT: Name: globalB ; SYM32-NEXT: Value (RelocatableAddress): 0x84 ; SYM64-NEXT: Value (RelocatableAddress): 0x98 @@ -451,7 +430,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: [[#INDX+25]] +; SYM-NEXT: Index: [[#INDX+23]] ; SYM32-NEXT: SectionLen: 4 ; SYM64-NEXT: SectionLen: 8 ; SYM-NEXT: ParameterHashIndex: 0x0 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-visibility.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-visibility.ll index ddeb5014b8ca7a..7ffd11f485e429 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-visibility.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-visibility.ll @@ -79,11 +79,8 @@ declare dllexport i32 @bar_e(ptr) ; CHECK: .weak .zoo_weak_extern_h[PR],hidden ; CHECK: .weak zoo_weak_extern_h[DS],hidden ; CHECK: .weak .zoo_weak_extern_e[PR],exported -; CHECK: .weak zoo_weak_extern_e[DS],exported ; CHECK: .extern .bar_h[PR],hidden -; CHECK: .extern bar_h[DS],hidden ; CHECK: .extern .bar_e[PR],exported -; CHECK: .extern bar_e[DS],exported ; AUX32: AuxiliaryHeader { ; AUX32-NEXT: Magic: 0x0 @@ -123,30 +120,12 @@ declare dllexport i32 @bar_e(ptr) ; SYM-NEXT: Type: 0x4000 ; SYM-NEXT: StorageClass: C_WEAKEXT (0x6F) -; SYM: Name: zoo_weak_extern_e -; SYM-NEXT: Value (RelocatableAddress): 0x0 -; SYM-NEXT: Section: N_UNDEF -; SYM-NEXT: Type: 0x4000 -; SYM-NEXT: StorageClass: C_WEAKEXT (0x6F) - -; SYM: Name: bar_h -; SYM-NEXT: Value (RelocatableAddress): 0x0 -; SYM-NEXT: Section: N_UNDEF -; SYM-NEXT: Type: 0x2000 -; SYM-NEXT: StorageClass: C_EXT (0x2) - ; SYM: Name: .bar_e ; SYM-NEXT: Value (RelocatableAddress): 0x0 ; SYM-NEXT: Section: N_UNDEF ; SYM-NEXT: Type: 0x4000 ; SYM-NEXT: StorageClass: C_EXT (0x2) -; SYM: Name: bar_e -; SYM-NEXT: Value (RelocatableAddress): 0x0 -; SYM-NEXT: Section: N_UNDEF -; SYM-NEXT: Type: 0x4000 -; SYM-NEXT: StorageClass: C_EXT (0x2) - ; SYM: Name: .foo ; SYM-NEXT: Value (RelocatableAddress): 0x0 ; SYM-NEXT: Section: .text diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll index 2d1fc21cda89b0..1586a133568b35 100644 --- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll +++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll @@ -1,26 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s -; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s -; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+seq-cst-trailing-fence -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso,+seq-cst-trailing-fence -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+seq-cst-trailing-fence -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso,+seq-cst-trailing-fence -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 292a2856ed8f53..6d525bd7866ea4 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -135,8 +135,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefixes=CHECK,RV64M %s ; RUN: llc -mtriple=riscv64 -mattr=+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV64ZMMUL %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV64MZMMUL %s -; RUN: llc -mtriple=riscv64 -mattr=+a --riscv-abi-attributes %s -o - | FileCheck --check-prefixes=CHECK,RV64A,A6C %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+seq-cst-trailing-fence --riscv-abi-attributes %s -o - | FileCheck --check-prefixes=CHECK,RV64A,A6S %s +; RUN: llc -mtriple=riscv64 -mattr=+a,no-trailing-seq-cst-fence --riscv-abi-attributes %s -o - | FileCheck --check-prefixes=CHECK,RV64A,A6C %s +; RUN: llc -mtriple=riscv64 -mattr=+a --riscv-abi-attributes %s -o - | FileCheck --check-prefixes=CHECK,RV64A,A6S %s ; RUN: llc -mtriple=riscv64 -mattr=+b %s -o - | FileCheck --check-prefixes=CHECK,RV64B %s ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefixes=CHECK,RV64F %s ; RUN: llc -mtriple=riscv64 -mattr=+d %s -o - | FileCheck --check-prefixes=CHECK,RV64D %s diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index a90c244437a033..8caa64c9572ce7 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -1671,30 +1671,26 @@ define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind { ; ; RV32ZBB-LABEL: test_ctlz_i8_zero_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a0, a0, 255 +; RV32ZBB-NEXT: slli a0, a0, 24 ; RV32ZBB-NEXT: clz a0, a0 -; RV32ZBB-NEXT: addi a0, a0, -24 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_ctlz_i8_zero_undef: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a0, a0, 255 +; RV64ZBB-NEXT: slli a0, a0, 56 ; RV64ZBB-NEXT: clz a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -56 ; RV64ZBB-NEXT: ret ; ; RV32XTHEADBB-LABEL: test_ctlz_i8_zero_undef: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: andi a0, a0, 255 +; RV32XTHEADBB-NEXT: slli a0, a0, 24 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 -; RV32XTHEADBB-NEXT: addi a0, a0, -24 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctlz_i8_zero_undef: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: andi a0, a0, 255 +; RV64XTHEADBB-NEXT: slli a0, a0, 56 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0 -; RV64XTHEADBB-NEXT: addi a0, a0, -56 ; RV64XTHEADBB-NEXT: ret %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true) ret i8 %tmp @@ -1771,30 +1767,26 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { ; ; RV32ZBB-LABEL: test_ctlz_i16_zero_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: slli a0, a0, 16 ; RV32ZBB-NEXT: clz a0, a0 -; RV32ZBB-NEXT: addi a0, a0, -16 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_ctlz_i16_zero_undef: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: slli a0, a0, 48 ; RV64ZBB-NEXT: clz a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -48 ; RV64ZBB-NEXT: ret ; ; RV32XTHEADBB-LABEL: test_ctlz_i16_zero_undef: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: slli a0, a0, 16 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 -; RV32XTHEADBB-NEXT: addi a0, a0, -16 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctlz_i16_zero_undef: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: slli a0, a0, 48 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0 -; RV64XTHEADBB-NEXT: addi a0, a0, -48 ; RV64XTHEADBB-NEXT: ret %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true) ret i16 %tmp diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index f6a53a9d76dd35..35900f8a0717aa 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC +; RUN: llc -mtriple=riscv32 -mattr=+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC ; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC -; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC -; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics,+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC-TRAILING +; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics,+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC +; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC-TRAILING +; RUN: llc -mtriple=riscv64 -mattr=+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC -; RUN: llc -mtriple=riscv64 -mattr=+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC -; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC -; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics,+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC-TRAILING +; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics,+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC +; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC-TRAILING define i8 @load8(ptr %p) nounwind { ; RV32-NO-ATOMIC-LABEL: load8: diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll index acd63f24bb8f76..80d3add3859690 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll @@ -236,13 +236,13 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; ; RV64XTHEADBB-LABEL: findLastSet_i32: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a1, a0, 31, 0 +; RV64XTHEADBB-NEXT: slli a1, a0, 32 ; RV64XTHEADBB-NEXT: th.ff1 a1, a1 -; RV64XTHEADBB-NEXT: addiw a1, a1, -32 ; RV64XTHEADBB-NEXT: xori a1, a1, 31 ; RV64XTHEADBB-NEXT: snez a0, a0 -; RV64XTHEADBB-NEXT: addiw a0, a0, -1 +; RV64XTHEADBB-NEXT: addi a0, a0, -1 ; RV64XTHEADBB-NEXT: or a0, a0, a1 +; RV64XTHEADBB-NEXT: sext.w a0, a0 ; RV64XTHEADBB-NEXT: ret %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true) %2 = xor i32 31, %1 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index df413b878172bd..58882525e55c4c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2604,9 +2604,8 @@ define @vp_ctlz_nxv1i9( %va, @vp_ctlz_zero_undef_nxv1i9( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i9: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 511 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t ; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t @@ -2614,18 +2613,13 @@ define @vp_ctlz_zero_undef_nxv1i9( %va, @llvm.vp.ctlz.nxv1i9( %va, i1 true, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index f7477da49a3543..eb7f6b1bb6540f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1,9 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RVA22U64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH,RV64V +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH,RVA22U64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN ; Tests that a floating-point build_vector doesn't try and generate a VID ; instruction @@ -249,6 +251,20 @@ define dso_local void @splat_load_licm(ptr %0) { ; RVA22U64-NEXT: bne a0, a1, .LBB12_1 ; RVA22U64-NEXT: # %bb.2: ; RVA22U64-NEXT: ret +; +; RV64ZVFHMIN-LABEL: splat_load_licm: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: lui a1, 1 +; RV64ZVFHMIN-NEXT: add a1, a0, a1 +; RV64ZVFHMIN-NEXT: lui a2, 263168 +; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64ZVFHMIN-NEXT: vmv.v.x v8, a2 +; RV64ZVFHMIN-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64ZVFHMIN-NEXT: vse32.v v8, (a0) +; RV64ZVFHMIN-NEXT: addi a0, a0, 16 +; RV64ZVFHMIN-NEXT: bne a0, a1, .LBB12_1 +; RV64ZVFHMIN-NEXT: # %bb.2: +; RV64ZVFHMIN-NEXT: ret br label %2 2: ; preds = %2, %1 @@ -265,12 +281,37 @@ define dso_local void @splat_load_licm(ptr %0) { } define <2 x half> @buildvec_v2f16(half %a, half %b) { -; CHECK-LABEL: buildvec_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: buildvec_v2f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV32ZVFH-NEXT: vfslide1down.vf v8, v8, fa1 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: buildvec_v2f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64ZVFH-NEXT: vfmv.v.f v8, fa0 +; RV64ZVFH-NEXT: vfslide1down.vf v8, v8, fa1 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: buildvec_v2f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: fmv.x.w a0, fa1 +; RV32ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: buildvec_v2f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: fmv.x.w a0, fa1 +; RV64ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1 +; RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVFHMIN-NEXT: ret %v1 = insertelement <2 x half> poison, half %a, i64 0 %v2 = insertelement <2 x half> %v1, half %b, i64 1 ret <2 x half> %v2 @@ -1297,45 +1338,136 @@ entry: } define <2 x half> @vid_v2f16() { -; CHECK-LABEL: vid_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vid_v2f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32ZVFH-NEXT: vid.v v8 +; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vid_v2f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64ZVFH-NEXT: vid.v v8 +; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vid_v2f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, 245760 +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vid_v2f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: lui a0, 245760 +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVFHMIN-NEXT: ret ret <2 x half> } define <2 x half> @vid_addend1_v2f16() { -; CHECK-LABEL: vid_addend1_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vi v8, v8, 1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vid_addend1_v2f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32ZVFH-NEXT: vid.v v8 +; RV32ZVFH-NEXT: vadd.vi v8, v8, 1 +; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vid_addend1_v2f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64ZVFH-NEXT: vid.v v8 +; RV64ZVFH-NEXT: vadd.vi v8, v8, 1 +; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vid_addend1_v2f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, 262148 +; RV32ZVFHMIN-NEXT: addi a0, a0, -1024 +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vid_addend1_v2f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: lui a0, 262148 +; RV64ZVFHMIN-NEXT: addi a0, a0, -1024 +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVFHMIN-NEXT: ret ret <2 x half> } define <2 x half> @vid_denominator2_v2f16() { -; CHECK-LABEL: vid_denominator2_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI28_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vid_denominator2_v2f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: lui a0, %hi(.LCPI28_0) +; RV32ZVFH-NEXT: addi a0, a0, %lo(.LCPI28_0) +; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32ZVFH-NEXT: vle16.v v8, (a0) +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vid_denominator2_v2f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: lui a0, %hi(.LCPI28_0) +; RV64ZVFH-NEXT: addi a0, a0, %lo(.LCPI28_0) +; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64ZVFH-NEXT: vle16.v v8, (a0) +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vid_denominator2_v2f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: lui a0, 245764 +; RV32ZVFHMIN-NEXT: addi a0, a0, -2048 +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vid_denominator2_v2f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: lui a0, 245764 +; RV64ZVFHMIN-NEXT: addi a0, a0, -2048 +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVFHMIN-NEXT: ret ret <2 x half> } define <2 x half> @vid_step2_v2f16() { -; CHECK-LABEL: vid_step2_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret +; RV32ZVFH-LABEL: vid_step2_v2f16: +; RV32ZVFH: # %bb.0: +; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32ZVFH-NEXT: vid.v v8 +; RV32ZVFH-NEXT: vadd.vv v8, v8, v8 +; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8 +; RV32ZVFH-NEXT: ret +; +; RV64ZVFH-LABEL: vid_step2_v2f16: +; RV64ZVFH: # %bb.0: +; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64ZVFH-NEXT: vid.v v8 +; RV64ZVFH-NEXT: vadd.vv v8, v8, v8 +; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8 +; RV64ZVFH-NEXT: ret +; +; RV32ZVFHMIN-LABEL: vid_step2_v2f16: +; RV32ZVFHMIN: # %bb.0: +; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32ZVFHMIN-NEXT: vid.v v8 +; RV32ZVFHMIN-NEXT: vsll.vi v8, v8, 14 +; RV32ZVFHMIN-NEXT: ret +; +; RV64ZVFHMIN-LABEL: vid_step2_v2f16: +; RV64ZVFHMIN: # %bb.0: +; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64ZVFHMIN-NEXT: vid.v v8 +; RV64ZVFHMIN-NEXT: vsll.vi v8, v8, 14 +; RV64ZVFHMIN-NEXT: ret ret <2 x half> } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index b3f4cabd56a0e2..d49929ce90c47a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -1184,46 +1184,95 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { -; RV32-LABEL: buildvec_v16i8_loads_contigous: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: lbu a2, 1(a0) -; RV32-NEXT: lbu a3, 2(a0) -; RV32-NEXT: lbu a4, 3(a0) -; RV32-NEXT: lbu a5, 4(a0) -; RV32-NEXT: lbu a6, 5(a0) -; RV32-NEXT: lbu a7, 6(a0) -; RV32-NEXT: lbu t0, 7(a0) -; RV32-NEXT: lbu t1, 9(a0) -; RV32-NEXT: lbu t2, 10(a0) -; RV32-NEXT: lbu t3, 11(a0) -; RV32-NEXT: lbu t4, 12(a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vlse8.v v8, (a0), zero -; RV32-NEXT: lbu t5, 13(a0) -; RV32-NEXT: lbu t6, 14(a0) -; RV32-NEXT: lbu a0, 15(a0) -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vlse8.v v9, (a1), zero -; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a7 -; RV32-NEXT: vslide1down.vx v10, v8, t0 -; RV32-NEXT: vslide1down.vx v8, v9, t1 -; RV32-NEXT: vslide1down.vx v8, v8, t2 -; RV32-NEXT: vslide1down.vx v8, v8, t3 -; RV32-NEXT: vslide1down.vx v8, v8, t4 -; RV32-NEXT: vslide1down.vx v8, v8, t5 -; RV32-NEXT: vslide1down.vx v8, v8, t6 -; RV32-NEXT: li a1, 255 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslidedown.vi v8, v10, 8, v0.t -; RV32-NEXT: ret +; RV32-ONLY-LABEL: buildvec_v16i8_loads_contigous: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: addi a1, a0, 8 +; RV32-ONLY-NEXT: lbu a2, 1(a0) +; RV32-ONLY-NEXT: lbu a3, 2(a0) +; RV32-ONLY-NEXT: lbu a4, 3(a0) +; RV32-ONLY-NEXT: lbu a5, 4(a0) +; RV32-ONLY-NEXT: lbu a6, 5(a0) +; RV32-ONLY-NEXT: lbu a7, 6(a0) +; RV32-ONLY-NEXT: lbu t0, 7(a0) +; RV32-ONLY-NEXT: lbu t1, 9(a0) +; RV32-ONLY-NEXT: lbu t2, 10(a0) +; RV32-ONLY-NEXT: lbu t3, 11(a0) +; RV32-ONLY-NEXT: lbu t4, 12(a0) +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-ONLY-NEXT: vlse8.v v8, (a0), zero +; RV32-ONLY-NEXT: lbu t5, 13(a0) +; RV32-ONLY-NEXT: lbu t6, 14(a0) +; RV32-ONLY-NEXT: lbu a0, 15(a0) +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV32-ONLY-NEXT: vlse8.v v9, (a1), zero +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0 +; RV32-ONLY-NEXT: vslide1down.vx v8, v9, t1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6 +; RV32-ONLY-NEXT: li a1, 255 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v16i8_loads_contigous: +; RV32VB: # %bb.0: +; RV32VB-NEXT: lbu a1, 1(a0) +; RV32VB-NEXT: lbu a2, 0(a0) +; RV32VB-NEXT: lbu a3, 2(a0) +; RV32VB-NEXT: lbu a4, 3(a0) +; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: or a1, a2, a1 +; RV32VB-NEXT: slli a3, a3, 16 +; RV32VB-NEXT: slli a4, a4, 24 +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: or a1, a1, a3 +; RV32VB-NEXT: lbu a2, 5(a0) +; RV32VB-NEXT: lbu a3, 4(a0) +; RV32VB-NEXT: lbu a4, 6(a0) +; RV32VB-NEXT: lbu a5, 7(a0) +; RV32VB-NEXT: slli a2, a2, 8 +; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: slli a4, a4, 16 +; RV32VB-NEXT: slli a5, a5, 24 +; RV32VB-NEXT: or a4, a5, a4 +; RV32VB-NEXT: or a2, a2, a4 +; RV32VB-NEXT: lbu a3, 9(a0) +; RV32VB-NEXT: lbu a4, 8(a0) +; RV32VB-NEXT: lbu a5, 10(a0) +; RV32VB-NEXT: lbu a6, 11(a0) +; RV32VB-NEXT: slli a3, a3, 8 +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: slli a5, a5, 16 +; RV32VB-NEXT: slli a6, a6, 24 +; RV32VB-NEXT: or a4, a6, a5 +; RV32VB-NEXT: or a3, a3, a4 +; RV32VB-NEXT: lbu a4, 13(a0) +; RV32VB-NEXT: lbu a5, 12(a0) +; RV32VB-NEXT: lbu a6, 14(a0) +; RV32VB-NEXT: lbu a0, 15(a0) +; RV32VB-NEXT: slli a4, a4, 8 +; RV32VB-NEXT: or a4, a5, a4 +; RV32VB-NEXT: slli a6, a6, 16 +; RV32VB-NEXT: slli a0, a0, 24 +; RV32VB-NEXT: or a0, a0, a6 +; RV32VB-NEXT: or a0, a4, a0 +; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32VB-NEXT: vmv.v.x v8, a1 +; RV32VB-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-NEXT: vslide1down.vx v8, v8, a3 +; RV32VB-NEXT: vslide1down.vx v8, v8, a0 +; RV32VB-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_contigous: ; RV64V-ONLY: # %bb.0: @@ -1268,43 +1317,53 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_loads_contigous: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: addi a6, a0, 8 -; RVA22U64-NEXT: lbu t6, 1(a0) +; RVA22U64-NEXT: lbu a1, 1(a0) +; RVA22U64-NEXT: lbu a2, 0(a0) ; RVA22U64-NEXT: lbu a3, 2(a0) ; RVA22U64-NEXT: lbu a4, 3(a0) -; RVA22U64-NEXT: lbu a5, 4(a0) -; RVA22U64-NEXT: lbu t5, 5(a0) -; RVA22U64-NEXT: lbu a7, 6(a0) -; RVA22U64-NEXT: lbu t0, 7(a0) -; RVA22U64-NEXT: lbu t1, 9(a0) -; RVA22U64-NEXT: lbu t2, 10(a0) -; RVA22U64-NEXT: lbu t3, 11(a0) -; RVA22U64-NEXT: lbu t4, 12(a0) -; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RVA22U64-NEXT: vlse8.v v8, (a0), zero -; RVA22U64-NEXT: lbu a1, 13(a0) -; RVA22U64-NEXT: lbu a2, 14(a0) +; RVA22U64-NEXT: slli a1, a1, 8 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: slli a3, a3, 16 +; RVA22U64-NEXT: slli a4, a4, 24 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: lbu a2, 4(a0) +; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: lbu a3, 5(a0) +; RVA22U64-NEXT: lbu a4, 6(a0) +; RVA22U64-NEXT: slli a2, a2, 32 +; RVA22U64-NEXT: lbu a5, 7(a0) +; RVA22U64-NEXT: slli a3, a3, 40 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a4, a4, 48 +; RVA22U64-NEXT: slli a5, a5, 56 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: or a2, a2, a4 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: lbu a2, 9(a0) +; RVA22U64-NEXT: lbu a3, 8(a0) +; RVA22U64-NEXT: lbu a4, 10(a0) +; RVA22U64-NEXT: lbu a5, 11(a0) +; RVA22U64-NEXT: slli a2, a2, 8 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a4, a4, 16 +; RVA22U64-NEXT: slli a5, a5, 24 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: lbu a3, 12(a0) +; RVA22U64-NEXT: or a2, a2, a4 +; RVA22U64-NEXT: lbu a4, 13(a0) +; RVA22U64-NEXT: lbu a5, 14(a0) +; RVA22U64-NEXT: slli a3, a3, 32 ; RVA22U64-NEXT: lbu a0, 15(a0) -; RVA22U64-NEXT: vslide1down.vx v8, v8, t6 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a3 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a4 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a5 -; RVA22U64-NEXT: vlse8.v v9, (a6), zero -; RVA22U64-NEXT: vslide1down.vx v8, v8, t5 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a7 -; RVA22U64-NEXT: vslide1down.vx v10, v8, t0 -; RVA22U64-NEXT: vslide1down.vx v8, v9, t1 -; RVA22U64-NEXT: vslide1down.vx v8, v8, t2 -; RVA22U64-NEXT: vslide1down.vx v8, v8, t3 -; RVA22U64-NEXT: vslide1down.vx v8, v8, t4 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a2 -; RVA22U64-NEXT: li a1, 255 -; RVA22U64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RVA22U64-NEXT: vmv.s.x v0, a1 -; RVA22U64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RVA22U64-NEXT: slli a4, a4, 40 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: slli a5, a5, 48 +; RVA22U64-NEXT: slli a0, a0, 56 +; RVA22U64-NEXT: or a0, a0, a5 +; RVA22U64-NEXT: or a0, a0, a3 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.v.x v8, a1 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 -; RVA22U64-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RVA22U64-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_loads_contigous: @@ -1401,46 +1460,95 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { -; RV32-LABEL: buildvec_v16i8_loads_gather: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 82 -; RV32-NEXT: lbu a2, 1(a0) -; RV32-NEXT: lbu a3, 22(a0) -; RV32-NEXT: lbu a4, 31(a0) -; RV32-NEXT: lbu a5, 44(a0) -; RV32-NEXT: lbu a6, 55(a0) -; RV32-NEXT: lbu a7, 623(a0) -; RV32-NEXT: lbu t0, 75(a0) -; RV32-NEXT: lbu t1, 93(a0) -; RV32-NEXT: lbu t2, 105(a0) -; RV32-NEXT: lbu t3, 161(a0) -; RV32-NEXT: lbu t4, 124(a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vlse8.v v8, (a0), zero -; RV32-NEXT: lbu t5, 163(a0) -; RV32-NEXT: lbu t6, 144(a0) -; RV32-NEXT: lbu a0, 154(a0) -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vlse8.v v9, (a1), zero -; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a7 -; RV32-NEXT: vslide1down.vx v10, v8, t0 -; RV32-NEXT: vslide1down.vx v8, v9, t1 -; RV32-NEXT: vslide1down.vx v8, v8, t2 -; RV32-NEXT: vslide1down.vx v8, v8, t3 -; RV32-NEXT: vslide1down.vx v8, v8, t4 -; RV32-NEXT: vslide1down.vx v8, v8, t5 -; RV32-NEXT: vslide1down.vx v8, v8, t6 -; RV32-NEXT: li a1, 255 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslidedown.vi v8, v10, 8, v0.t -; RV32-NEXT: ret +; RV32-ONLY-LABEL: buildvec_v16i8_loads_gather: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: addi a1, a0, 82 +; RV32-ONLY-NEXT: lbu a2, 1(a0) +; RV32-ONLY-NEXT: lbu a3, 22(a0) +; RV32-ONLY-NEXT: lbu a4, 31(a0) +; RV32-ONLY-NEXT: lbu a5, 44(a0) +; RV32-ONLY-NEXT: lbu a6, 55(a0) +; RV32-ONLY-NEXT: lbu a7, 623(a0) +; RV32-ONLY-NEXT: lbu t0, 75(a0) +; RV32-ONLY-NEXT: lbu t1, 93(a0) +; RV32-ONLY-NEXT: lbu t2, 105(a0) +; RV32-ONLY-NEXT: lbu t3, 161(a0) +; RV32-ONLY-NEXT: lbu t4, 124(a0) +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-ONLY-NEXT: vlse8.v v8, (a0), zero +; RV32-ONLY-NEXT: lbu t5, 163(a0) +; RV32-ONLY-NEXT: lbu t6, 144(a0) +; RV32-ONLY-NEXT: lbu a0, 154(a0) +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV32-ONLY-NEXT: vlse8.v v9, (a1), zero +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0 +; RV32-ONLY-NEXT: vslide1down.vx v8, v9, t1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6 +; RV32-ONLY-NEXT: li a1, 255 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v16i8_loads_gather: +; RV32VB: # %bb.0: +; RV32VB-NEXT: lbu a1, 1(a0) +; RV32VB-NEXT: lbu a2, 0(a0) +; RV32VB-NEXT: lbu a3, 22(a0) +; RV32VB-NEXT: lbu a4, 31(a0) +; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: or a1, a2, a1 +; RV32VB-NEXT: slli a3, a3, 16 +; RV32VB-NEXT: slli a4, a4, 24 +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: or a1, a1, a3 +; RV32VB-NEXT: lbu a2, 55(a0) +; RV32VB-NEXT: lbu a3, 44(a0) +; RV32VB-NEXT: lbu a4, 623(a0) +; RV32VB-NEXT: lbu a5, 75(a0) +; RV32VB-NEXT: slli a2, a2, 8 +; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: slli a4, a4, 16 +; RV32VB-NEXT: slli a5, a5, 24 +; RV32VB-NEXT: or a4, a5, a4 +; RV32VB-NEXT: or a2, a2, a4 +; RV32VB-NEXT: lbu a3, 93(a0) +; RV32VB-NEXT: lbu a4, 82(a0) +; RV32VB-NEXT: lbu a5, 105(a0) +; RV32VB-NEXT: lbu a6, 161(a0) +; RV32VB-NEXT: slli a3, a3, 8 +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: slli a5, a5, 16 +; RV32VB-NEXT: slli a6, a6, 24 +; RV32VB-NEXT: or a4, a6, a5 +; RV32VB-NEXT: or a3, a3, a4 +; RV32VB-NEXT: lbu a4, 163(a0) +; RV32VB-NEXT: lbu a5, 124(a0) +; RV32VB-NEXT: lbu a6, 144(a0) +; RV32VB-NEXT: lbu a0, 154(a0) +; RV32VB-NEXT: slli a4, a4, 8 +; RV32VB-NEXT: or a4, a5, a4 +; RV32VB-NEXT: slli a6, a6, 16 +; RV32VB-NEXT: slli a0, a0, 24 +; RV32VB-NEXT: or a0, a0, a6 +; RV32VB-NEXT: or a0, a4, a0 +; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32VB-NEXT: vmv.v.x v8, a1 +; RV32VB-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-NEXT: vslide1down.vx v8, v8, a3 +; RV32VB-NEXT: vslide1down.vx v8, v8, a0 +; RV32VB-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_gather: ; RV64V-ONLY: # %bb.0: @@ -1485,43 +1593,53 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_loads_gather: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: addi a6, a0, 82 -; RVA22U64-NEXT: lbu t6, 1(a0) +; RVA22U64-NEXT: lbu a1, 1(a0) +; RVA22U64-NEXT: lbu a2, 0(a0) ; RVA22U64-NEXT: lbu a3, 22(a0) ; RVA22U64-NEXT: lbu a4, 31(a0) -; RVA22U64-NEXT: lbu a5, 44(a0) -; RVA22U64-NEXT: lbu t5, 55(a0) -; RVA22U64-NEXT: lbu a7, 623(a0) -; RVA22U64-NEXT: lbu t0, 75(a0) -; RVA22U64-NEXT: lbu t1, 93(a0) -; RVA22U64-NEXT: lbu t2, 105(a0) -; RVA22U64-NEXT: lbu t3, 161(a0) -; RVA22U64-NEXT: lbu t4, 124(a0) -; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RVA22U64-NEXT: vlse8.v v8, (a0), zero -; RVA22U64-NEXT: lbu a1, 163(a0) -; RVA22U64-NEXT: lbu a2, 144(a0) +; RVA22U64-NEXT: slli a1, a1, 8 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: slli a3, a3, 16 +; RVA22U64-NEXT: slli a4, a4, 24 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: lbu a2, 44(a0) +; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: lbu a3, 55(a0) +; RVA22U64-NEXT: lbu a4, 623(a0) +; RVA22U64-NEXT: slli a2, a2, 32 +; RVA22U64-NEXT: lbu a5, 75(a0) +; RVA22U64-NEXT: slli a3, a3, 40 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a4, a4, 48 +; RVA22U64-NEXT: slli a5, a5, 56 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: or a2, a2, a4 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: lbu a2, 93(a0) +; RVA22U64-NEXT: lbu a3, 82(a0) +; RVA22U64-NEXT: lbu a4, 105(a0) +; RVA22U64-NEXT: lbu a5, 161(a0) +; RVA22U64-NEXT: slli a2, a2, 8 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a4, a4, 16 +; RVA22U64-NEXT: slli a5, a5, 24 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: lbu a3, 124(a0) +; RVA22U64-NEXT: or a2, a2, a4 +; RVA22U64-NEXT: lbu a4, 163(a0) +; RVA22U64-NEXT: lbu a5, 144(a0) +; RVA22U64-NEXT: slli a3, a3, 32 ; RVA22U64-NEXT: lbu a0, 154(a0) -; RVA22U64-NEXT: vslide1down.vx v8, v8, t6 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a3 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a4 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a5 -; RVA22U64-NEXT: vlse8.v v9, (a6), zero -; RVA22U64-NEXT: vslide1down.vx v8, v8, t5 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a7 -; RVA22U64-NEXT: vslide1down.vx v10, v8, t0 -; RVA22U64-NEXT: vslide1down.vx v8, v9, t1 -; RVA22U64-NEXT: vslide1down.vx v8, v8, t2 -; RVA22U64-NEXT: vslide1down.vx v8, v8, t3 -; RVA22U64-NEXT: vslide1down.vx v8, v8, t4 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a2 -; RVA22U64-NEXT: li a1, 255 -; RVA22U64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RVA22U64-NEXT: vmv.s.x v0, a1 -; RVA22U64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RVA22U64-NEXT: slli a4, a4, 40 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: slli a5, a5, 48 +; RVA22U64-NEXT: slli a0, a0, 56 +; RVA22U64-NEXT: or a0, a0, a5 +; RVA22U64-NEXT: or a0, a0, a3 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.v.x v8, a1 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 -; RVA22U64-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RVA22U64-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_loads_gather: @@ -1617,26 +1735,55 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { } define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { -; RV32-LABEL: buildvec_v16i8_undef_low_half: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 82 -; RV32-NEXT: lbu a2, 93(a0) -; RV32-NEXT: lbu a3, 105(a0) -; RV32-NEXT: lbu a4, 161(a0) -; RV32-NEXT: lbu a5, 124(a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vlse8.v v8, (a1), zero -; RV32-NEXT: lbu a1, 163(a0) -; RV32-NEXT: lbu a6, 144(a0) -; RV32-NEXT: lbu a0, 154(a0) -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: ret +; RV32-ONLY-LABEL: buildvec_v16i8_undef_low_half: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: addi a1, a0, 82 +; RV32-ONLY-NEXT: lbu a2, 93(a0) +; RV32-ONLY-NEXT: lbu a3, 105(a0) +; RV32-ONLY-NEXT: lbu a4, 161(a0) +; RV32-ONLY-NEXT: lbu a5, 124(a0) +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-ONLY-NEXT: vlse8.v v8, (a1), zero +; RV32-ONLY-NEXT: lbu a1, 163(a0) +; RV32-ONLY-NEXT: lbu a6, 144(a0) +; RV32-ONLY-NEXT: lbu a0, 154(a0) +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v16i8_undef_low_half: +; RV32VB: # %bb.0: +; RV32VB-NEXT: lbu a1, 93(a0) +; RV32VB-NEXT: lbu a2, 82(a0) +; RV32VB-NEXT: lbu a3, 105(a0) +; RV32VB-NEXT: lbu a4, 161(a0) +; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: or a1, a2, a1 +; RV32VB-NEXT: slli a3, a3, 16 +; RV32VB-NEXT: slli a4, a4, 24 +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: or a1, a1, a3 +; RV32VB-NEXT: lbu a2, 163(a0) +; RV32VB-NEXT: lbu a3, 124(a0) +; RV32VB-NEXT: lbu a4, 144(a0) +; RV32VB-NEXT: lbu a0, 154(a0) +; RV32VB-NEXT: slli a2, a2, 8 +; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: slli a4, a4, 16 +; RV32VB-NEXT: slli a0, a0, 24 +; RV32VB-NEXT: or a0, a0, a4 +; RV32VB-NEXT: or a0, a2, a0 +; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32VB-NEXT: vmv.v.i v8, 0 +; RV32VB-NEXT: vslide1down.vx v8, v8, zero +; RV32VB-NEXT: vslide1down.vx v8, v8, a1 +; RV32VB-NEXT: vslide1down.vx v8, v8, a0 +; RV32VB-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_low_half: ; RV64V-ONLY: # %bb.0: @@ -1661,22 +1808,30 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_undef_low_half: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: addi a1, a0, 82 -; RVA22U64-NEXT: lbu a6, 93(a0) +; RVA22U64-NEXT: lbu a1, 93(a0) +; RVA22U64-NEXT: lbu a2, 82(a0) ; RVA22U64-NEXT: lbu a3, 105(a0) ; RVA22U64-NEXT: lbu a4, 161(a0) -; RVA22U64-NEXT: lbu a5, 124(a0) -; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RVA22U64-NEXT: vlse8.v v8, (a1), zero -; RVA22U64-NEXT: lbu a1, 163(a0) -; RVA22U64-NEXT: lbu a2, 144(a0) +; RVA22U64-NEXT: slli a1, a1, 8 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: slli a3, a3, 16 +; RVA22U64-NEXT: slli a4, a4, 24 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: lbu a2, 124(a0) +; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: lbu a3, 163(a0) +; RVA22U64-NEXT: lbu a4, 144(a0) +; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: lbu a0, 154(a0) -; RVA22U64-NEXT: vslide1down.vx v8, v8, a6 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a3 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a4 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a5 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a2 +; RVA22U64-NEXT: slli a3, a3, 40 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a4, a4, 48 +; RVA22U64-NEXT: slli a0, a0, 56 +; RVA22U64-NEXT: or a0, a0, a4 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.v.i v8, 0 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; @@ -1730,26 +1885,55 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { } define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { -; RV32-LABEL: buildvec_v16i8_undef_high_half: -; RV32: # %bb.0: -; RV32-NEXT: lbu a1, 1(a0) -; RV32-NEXT: lbu a2, 22(a0) -; RV32-NEXT: lbu a3, 31(a0) -; RV32-NEXT: lbu a4, 44(a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vlse8.v v8, (a0), zero -; RV32-NEXT: lbu a5, 55(a0) -; RV32-NEXT: lbu a6, 623(a0) -; RV32-NEXT: lbu a0, 75(a0) -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslidedown.vi v8, v8, 8 -; RV32-NEXT: ret +; RV32-ONLY-LABEL: buildvec_v16i8_undef_high_half: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: lbu a1, 1(a0) +; RV32-ONLY-NEXT: lbu a2, 22(a0) +; RV32-ONLY-NEXT: lbu a3, 31(a0) +; RV32-ONLY-NEXT: lbu a4, 44(a0) +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-ONLY-NEXT: vlse8.v v8, (a0), zero +; RV32-ONLY-NEXT: lbu a5, 55(a0) +; RV32-ONLY-NEXT: lbu a6, 623(a0) +; RV32-ONLY-NEXT: lbu a0, 75(a0) +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 8 +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v16i8_undef_high_half: +; RV32VB: # %bb.0: +; RV32VB-NEXT: lbu a1, 1(a0) +; RV32VB-NEXT: lbu a2, 0(a0) +; RV32VB-NEXT: lbu a3, 22(a0) +; RV32VB-NEXT: lbu a4, 31(a0) +; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: or a1, a2, a1 +; RV32VB-NEXT: slli a3, a3, 16 +; RV32VB-NEXT: slli a4, a4, 24 +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: or a1, a1, a3 +; RV32VB-NEXT: lbu a2, 55(a0) +; RV32VB-NEXT: lbu a3, 44(a0) +; RV32VB-NEXT: lbu a4, 623(a0) +; RV32VB-NEXT: lbu a0, 75(a0) +; RV32VB-NEXT: slli a2, a2, 8 +; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: slli a4, a4, 16 +; RV32VB-NEXT: slli a0, a0, 24 +; RV32VB-NEXT: or a0, a0, a4 +; RV32VB-NEXT: or a0, a2, a0 +; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32VB-NEXT: vmv.v.x v8, a1 +; RV32VB-NEXT: vslide1down.vx v8, v8, a0 +; RV32VB-NEXT: vslide1down.vx v8, v8, zero +; RV32VB-NEXT: vslide1down.vx v8, v8, zero +; RV32VB-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_high_half: ; RV64V-ONLY: # %bb.0: @@ -1774,23 +1958,31 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_undef_high_half: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: lbu a6, 1(a0) -; RVA22U64-NEXT: lbu a2, 22(a0) -; RVA22U64-NEXT: lbu a3, 31(a0) -; RVA22U64-NEXT: lbu a4, 44(a0) -; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RVA22U64-NEXT: vlse8.v v8, (a0), zero -; RVA22U64-NEXT: lbu a5, 55(a0) -; RVA22U64-NEXT: lbu a1, 623(a0) +; RVA22U64-NEXT: lbu a1, 1(a0) +; RVA22U64-NEXT: lbu a2, 0(a0) +; RVA22U64-NEXT: lbu a3, 22(a0) +; RVA22U64-NEXT: lbu a4, 31(a0) +; RVA22U64-NEXT: slli a1, a1, 8 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: slli a3, a3, 16 +; RVA22U64-NEXT: slli a4, a4, 24 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: lbu a2, 44(a0) +; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: lbu a3, 55(a0) +; RVA22U64-NEXT: lbu a4, 623(a0) +; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: lbu a0, 75(a0) -; RVA22U64-NEXT: vslide1down.vx v8, v8, a6 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a2 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a3 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a4 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a5 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 -; RVA22U64-NEXT: vslidedown.vi v8, v8, 8 +; RVA22U64-NEXT: slli a3, a3, 40 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a4, a4, 48 +; RVA22U64-NEXT: slli a0, a0, 56 +; RVA22U64-NEXT: or a0, a0, a4 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.v.x v8, a0 +; RVA22U64-NEXT: vslide1down.vx v8, v8, zero ; RVA22U64-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_undef_high_half: @@ -1842,34 +2034,65 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { } define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { -; RV32-LABEL: buildvec_v16i8_undef_edges: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 31 -; RV32-NEXT: addi a2, a0, 82 -; RV32-NEXT: lbu a3, 44(a0) -; RV32-NEXT: lbu a4, 55(a0) -; RV32-NEXT: lbu a5, 623(a0) -; RV32-NEXT: lbu a6, 75(a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vlse8.v v8, (a1), zero -; RV32-NEXT: lbu a1, 93(a0) -; RV32-NEXT: lbu a7, 105(a0) -; RV32-NEXT: lbu a0, 161(a0) -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vlse8.v v9, (a2), zero -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vslide1down.vx v10, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v9, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a7 -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: li a0, 255 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32-NEXT: vslidedown.vi v8, v8, 4 -; RV32-NEXT: vslidedown.vi v8, v10, 8, v0.t -; RV32-NEXT: ret +; RV32-ONLY-LABEL: buildvec_v16i8_undef_edges: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: addi a1, a0, 31 +; RV32-ONLY-NEXT: addi a2, a0, 82 +; RV32-ONLY-NEXT: lbu a3, 44(a0) +; RV32-ONLY-NEXT: lbu a4, 55(a0) +; RV32-ONLY-NEXT: lbu a5, 623(a0) +; RV32-ONLY-NEXT: lbu a6, 75(a0) +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-ONLY-NEXT: vlse8.v v8, (a1), zero +; RV32-ONLY-NEXT: lbu a1, 93(a0) +; RV32-ONLY-NEXT: lbu a7, 105(a0) +; RV32-ONLY-NEXT: lbu a0, 161(a0) +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: vlse8.v v9, (a2), zero +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a6 +; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV32-ONLY-NEXT: li a0, 255 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a0 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 4 +; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v16i8_undef_edges: +; RV32VB: # %bb.0: +; RV32VB-NEXT: lbu a1, 31(a0) +; RV32VB-NEXT: lbu a2, 55(a0) +; RV32VB-NEXT: lbu a3, 44(a0) +; RV32VB-NEXT: lbu a4, 623(a0) +; RV32VB-NEXT: lbu a5, 75(a0) +; RV32VB-NEXT: slli a2, a2, 8 +; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: slli a4, a4, 16 +; RV32VB-NEXT: slli a5, a5, 24 +; RV32VB-NEXT: lbu a3, 93(a0) +; RV32VB-NEXT: or a4, a5, a4 +; RV32VB-NEXT: or a2, a2, a4 +; RV32VB-NEXT: lbu a4, 82(a0) +; RV32VB-NEXT: slli a3, a3, 8 +; RV32VB-NEXT: lbu a5, 105(a0) +; RV32VB-NEXT: lbu a0, 161(a0) +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: slli a1, a1, 24 +; RV32VB-NEXT: slli a5, a5, 16 +; RV32VB-NEXT: slli a0, a0, 24 +; RV32VB-NEXT: or a0, a0, a5 +; RV32VB-NEXT: or a0, a3, a0 +; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32VB-NEXT: vmv.v.x v8, a1 +; RV32VB-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-NEXT: vslide1down.vx v8, v8, a0 +; RV32VB-NEXT: vslide1down.vx v8, v8, zero +; RV32VB-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_undef_edges: ; RV64V-ONLY: # %bb.0: @@ -1902,31 +2125,33 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_undef_edges: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: addi a1, a0, 31 -; RVA22U64-NEXT: addi a6, a0, 82 -; RVA22U64-NEXT: lbu a3, 44(a0) -; RVA22U64-NEXT: lbu a4, 55(a0) -; RVA22U64-NEXT: lbu a5, 623(a0) -; RVA22U64-NEXT: lbu a7, 75(a0) -; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RVA22U64-NEXT: vlse8.v v8, (a1), zero -; RVA22U64-NEXT: lbu a1, 93(a0) -; RVA22U64-NEXT: lbu a2, 105(a0) +; RVA22U64-NEXT: lbu a1, 44(a0) +; RVA22U64-NEXT: lbu a2, 55(a0) +; RVA22U64-NEXT: lbu a3, 31(a0) +; RVA22U64-NEXT: lbu a4, 623(a0) +; RVA22U64-NEXT: slli a1, a1, 32 +; RVA22U64-NEXT: slli a2, a2, 40 +; RVA22U64-NEXT: lbu a5, 75(a0) +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: slli a3, a3, 24 +; RVA22U64-NEXT: slli a4, a4, 48 +; RVA22U64-NEXT: slli a5, a5, 56 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: or a1, a1, a4 +; RVA22U64-NEXT: add.uw a1, a3, a1 +; RVA22U64-NEXT: lbu a2, 93(a0) +; RVA22U64-NEXT: lbu a3, 82(a0) +; RVA22U64-NEXT: lbu a4, 105(a0) ; RVA22U64-NEXT: lbu a0, 161(a0) -; RVA22U64-NEXT: vslide1down.vx v8, v8, a3 -; RVA22U64-NEXT: vlse8.v v9, (a6), zero -; RVA22U64-NEXT: vslide1down.vx v8, v8, a4 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a5 -; RVA22U64-NEXT: vslide1down.vx v10, v8, a7 -; RVA22U64-NEXT: vslide1down.vx v8, v9, a1 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a2 +; RVA22U64-NEXT: slli a2, a2, 8 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a4, a4, 16 +; RVA22U64-NEXT: slli a0, a0, 24 +; RVA22U64-NEXT: or a0, a0, a4 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.v.x v8, a1 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 -; RVA22U64-NEXT: li a0, 255 -; RVA22U64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RVA22U64-NEXT: vmv.s.x v0, a0 -; RVA22U64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RVA22U64-NEXT: vslidedown.vi v8, v8, 4 -; RVA22U64-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RVA22U64-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_undef_edges: @@ -1990,38 +2215,69 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { } define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { -; RV32-LABEL: buildvec_v16i8_loads_undef_scattered: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 82 -; RV32-NEXT: lbu a2, 1(a0) -; RV32-NEXT: lbu a3, 44(a0) -; RV32-NEXT: lbu a4, 55(a0) -; RV32-NEXT: lbu a5, 75(a0) -; RV32-NEXT: lbu a6, 93(a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vlse8.v v8, (a0), zero -; RV32-NEXT: lbu a7, 124(a0) -; RV32-NEXT: lbu t0, 144(a0) -; RV32-NEXT: lbu a0, 154(a0) -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vlse8.v v9, (a1), zero -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vslide1down.vx v10, v8, a5 -; RV32-NEXT: vslide1down.vx v8, v9, a6 -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vslide1down.vx v8, v8, a7 -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vslide1down.vx v8, v8, t0 -; RV32-NEXT: li a1, 255 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslidedown.vi v8, v10, 8, v0.t -; RV32-NEXT: ret +; RV32-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: addi a1, a0, 82 +; RV32-ONLY-NEXT: lbu a2, 1(a0) +; RV32-ONLY-NEXT: lbu a3, 44(a0) +; RV32-ONLY-NEXT: lbu a4, 55(a0) +; RV32-ONLY-NEXT: lbu a5, 75(a0) +; RV32-ONLY-NEXT: lbu a6, 93(a0) +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-ONLY-NEXT: vlse8.v v8, (a0), zero +; RV32-ONLY-NEXT: lbu a7, 124(a0) +; RV32-ONLY-NEXT: lbu t0, 144(a0) +; RV32-ONLY-NEXT: lbu a0, 154(a0) +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: vlse8.v v9, (a1), zero +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1 +; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5 +; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a6 +; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0 +; RV32-ONLY-NEXT: li a1, 255 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0 +; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v16i8_loads_undef_scattered: +; RV32VB: # %bb.0: +; RV32VB-NEXT: lbu a1, 1(a0) +; RV32VB-NEXT: lbu a2, 0(a0) +; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: lbu a3, 55(a0) +; RV32VB-NEXT: lbu a4, 44(a0) +; RV32VB-NEXT: or a1, a2, a1 +; RV32VB-NEXT: lbu a2, 75(a0) +; RV32VB-NEXT: slli a3, a3, 8 +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: lbu a4, 93(a0) +; RV32VB-NEXT: slli a2, a2, 24 +; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: lbu a3, 82(a0) +; RV32VB-NEXT: slli a4, a4, 8 +; RV32VB-NEXT: lbu a5, 144(a0) +; RV32VB-NEXT: lbu a6, 154(a0) +; RV32VB-NEXT: or a3, a3, a4 +; RV32VB-NEXT: lbu a0, 124(a0) +; RV32VB-NEXT: slli a5, a5, 16 +; RV32VB-NEXT: slli a6, a6, 24 +; RV32VB-NEXT: or a4, a6, a5 +; RV32VB-NEXT: or a0, a0, a4 +; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32VB-NEXT: vmv.v.x v8, a1 +; RV32VB-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-NEXT: vslide1down.vx v8, v8, a3 +; RV32VB-NEXT: vslide1down.vx v8, v8, a0 +; RV32VB-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered: ; RV64V-ONLY: # %bb.0: @@ -2058,35 +2314,35 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_loads_undef_scattered: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: addi a6, a0, 82 -; RVA22U64-NEXT: lbu a2, 1(a0) +; RVA22U64-NEXT: lbu a1, 1(a0) +; RVA22U64-NEXT: lbu a2, 0(a0) +; RVA22U64-NEXT: slli a1, a1, 8 ; RVA22U64-NEXT: lbu a3, 44(a0) ; RVA22U64-NEXT: lbu a4, 55(a0) -; RVA22U64-NEXT: lbu t0, 75(a0) -; RVA22U64-NEXT: lbu a7, 93(a0) -; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RVA22U64-NEXT: vlse8.v v8, (a0), zero -; RVA22U64-NEXT: lbu a1, 124(a0) -; RVA22U64-NEXT: lbu a5, 144(a0) -; RVA22U64-NEXT: lbu a0, 154(a0) -; RVA22U64-NEXT: vslide1down.vx v8, v8, a2 -; RVA22U64-NEXT: vslidedown.vi v8, v8, 2 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a3 -; RVA22U64-NEXT: vlse8.v v9, (a6), zero -; RVA22U64-NEXT: vslide1down.vx v8, v8, a4 -; RVA22U64-NEXT: vslidedown.vi v8, v8, 1 -; RVA22U64-NEXT: vslide1down.vx v10, v8, t0 -; RVA22U64-NEXT: vslide1down.vx v8, v9, a7 -; RVA22U64-NEXT: vslidedown.vi v8, v8, 2 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 -; RVA22U64-NEXT: vslidedown.vi v8, v8, 1 -; RVA22U64-NEXT: vslide1down.vx v8, v8, a5 -; RVA22U64-NEXT: li a1, 255 -; RVA22U64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RVA22U64-NEXT: vmv.s.x v0, a1 -; RVA22U64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: lbu a2, 75(a0) +; RVA22U64-NEXT: slli a3, a3, 32 +; RVA22U64-NEXT: slli a4, a4, 40 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: slli a2, a2, 56 +; RVA22U64-NEXT: lbu a4, 93(a0) +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: lbu a2, 82(a0) +; RVA22U64-NEXT: slli a4, a4, 8 +; RVA22U64-NEXT: lbu a3, 144(a0) +; RVA22U64-NEXT: lbu a5, 154(a0) +; RVA22U64-NEXT: or a2, a2, a4 +; RVA22U64-NEXT: lbu a0, 124(a0) +; RVA22U64-NEXT: slli a3, a3, 48 +; RVA22U64-NEXT: slli a5, a5, 56 +; RVA22U64-NEXT: or a3, a3, a5 +; RVA22U64-NEXT: slli a0, a0, 32 +; RVA22U64-NEXT: or a0, a0, a3 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.v.x v8, a1 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 -; RVA22U64-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RVA22U64-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_loads_undef_scattered: @@ -2172,6 +2428,315 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 ret <16 x i8> %v16 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; RV32-ONLY: {{.*}} -; RV32VB: {{.*}} + +define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6, i8 %e7, i8 %e8) { +; RV32-ONLY-LABEL: buildvec_v8i8_pack: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-ONLY-NEXT: vmv.v.x v8, a0 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a3 +; RV32-ONLY-NEXT: vmv.v.x v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6 +; RV32-ONLY-NEXT: vmv.v.i v0, 15 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v8i8_pack: +; RV32VB: # %bb.0: +; RV32VB-NEXT: slli a7, a7, 24 +; RV32VB-NEXT: andi a6, a6, 255 +; RV32VB-NEXT: slli a6, a6, 16 +; RV32VB-NEXT: or a6, a7, a6 +; RV32VB-NEXT: andi a4, a4, 255 +; RV32VB-NEXT: andi a5, a5, 255 +; RV32VB-NEXT: slli a5, a5, 8 +; RV32VB-NEXT: or a4, a4, a5 +; RV32VB-NEXT: or a4, a4, a6 +; RV32VB-NEXT: slli a3, a3, 24 +; RV32VB-NEXT: andi a2, a2, 255 +; RV32VB-NEXT: slli a2, a2, 16 +; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: andi a0, a0, 255 +; RV32VB-NEXT: andi a1, a1, 255 +; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: or a0, a0, a1 +; RV32VB-NEXT: or a0, a0, a2 +; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32VB-NEXT: vmv.v.x v8, a0 +; RV32VB-NEXT: vslide1down.vx v8, v8, a4 +; RV32VB-NEXT: ret +; +; RV64V-ONLY-LABEL: buildvec_v8i8_pack: +; RV64V-ONLY: # %bb.0: +; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64V-ONLY-NEXT: vmv.v.x v8, a0 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a3 +; RV64V-ONLY-NEXT: vmv.v.x v8, a4 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6 +; RV64V-ONLY-NEXT: vmv.v.i v0, 15 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7 +; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t +; RV64V-ONLY-NEXT: ret +; +; RVA22U64-LABEL: buildvec_v8i8_pack: +; RVA22U64: # %bb.0: +; RVA22U64-NEXT: andi a4, a4, 255 +; RVA22U64-NEXT: slli a4, a4, 32 +; RVA22U64-NEXT: andi a5, a5, 255 +; RVA22U64-NEXT: slli a5, a5, 40 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: slli a7, a7, 56 +; RVA22U64-NEXT: andi a5, a6, 255 +; RVA22U64-NEXT: slli a5, a5, 48 +; RVA22U64-NEXT: or a5, a7, a5 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: andi a2, a2, 255 +; RVA22U64-NEXT: slli a2, a2, 16 +; RVA22U64-NEXT: andi a3, a3, 255 +; RVA22U64-NEXT: slli a3, a3, 24 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: andi a0, a0, 255 +; RVA22U64-NEXT: andi a1, a1, 255 +; RVA22U64-NEXT: slli a1, a1, 8 +; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: or a0, a0, a4 +; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.s.x v8, a0 +; RVA22U64-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_v8i8_pack: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64ZVE32-NEXT: vmv.v.x v8, a0 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a3 +; RV64ZVE32-NEXT: vmv.v.x v8, a4 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32-NEXT: vmv.v.i v0, 15 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4, v0.t +; RV64ZVE32-NEXT: ret + %v1 = insertelement <8 x i8> poison, i8 %e1, i32 0 + %v2 = insertelement <8 x i8> %v1, i8 %e2, i32 1 + %v3 = insertelement <8 x i8> %v2, i8 %e3, i32 2 + %v4 = insertelement <8 x i8> %v3, i8 %e4, i32 3 + %v5 = insertelement <8 x i8> %v4, i8 %e5, i32 4 + %v6 = insertelement <8 x i8> %v5, i8 %e6, i32 5 + %v7 = insertelement <8 x i8> %v6, i8 %e7, i32 6 + %v8 = insertelement <8 x i8> %v7, i8 %e8, i32 7 + ret <8 x i8> %v8 +} + +define <6 x i8> @buildvec_v6i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6) { +; RV32-ONLY-LABEL: buildvec_v6i8_pack: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-ONLY-NEXT: vmv.v.x v8, a0 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2 +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v6i8_pack: +; RV32VB: # %bb.0: +; RV32VB-NEXT: slli a3, a3, 24 +; RV32VB-NEXT: andi a2, a2, 255 +; RV32VB-NEXT: slli a2, a2, 16 +; RV32VB-NEXT: or a2, a3, a2 +; RV32VB-NEXT: andi a0, a0, 255 +; RV32VB-NEXT: andi a1, a1, 255 +; RV32VB-NEXT: slli a1, a1, 8 +; RV32VB-NEXT: or a0, a0, a1 +; RV32VB-NEXT: or a0, a0, a2 +; RV32VB-NEXT: andi a1, a4, 255 +; RV32VB-NEXT: andi a2, a5, 255 +; RV32VB-NEXT: slli a2, a2, 8 +; RV32VB-NEXT: or a1, a1, a2 +; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32VB-NEXT: vmv.v.x v8, a0 +; RV32VB-NEXT: vslide1down.vx v8, v8, a1 +; RV32VB-NEXT: ret +; +; RV64V-ONLY-LABEL: buildvec_v6i8_pack: +; RV64V-ONLY: # %bb.0: +; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-ONLY-NEXT: vmv.v.x v8, a0 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5 +; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2 +; RV64V-ONLY-NEXT: ret +; +; RVA22U64-LABEL: buildvec_v6i8_pack: +; RVA22U64: # %bb.0: +; RVA22U64-NEXT: andi a2, a2, 255 +; RVA22U64-NEXT: slli a2, a2, 16 +; RVA22U64-NEXT: andi a3, a3, 255 +; RVA22U64-NEXT: slli a3, a3, 24 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: andi a0, a0, 255 +; RVA22U64-NEXT: andi a1, a1, 255 +; RVA22U64-NEXT: slli a1, a1, 8 +; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: andi a1, a4, 255 +; RVA22U64-NEXT: slli a1, a1, 32 +; RVA22U64-NEXT: andi a2, a5, 255 +; RVA22U64-NEXT: slli a2, a2, 40 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.s.x v8, a0 +; RVA22U64-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_v6i8_pack: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32-NEXT: vmv.v.x v8, a0 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32-NEXT: ret + %v1 = insertelement <6 x i8> poison, i8 %e1, i32 0 + %v2 = insertelement <6 x i8> %v1, i8 %e2, i32 1 + %v3 = insertelement <6 x i8> %v2, i8 %e3, i32 2 + %v4 = insertelement <6 x i8> %v3, i8 %e4, i32 3 + %v5 = insertelement <6 x i8> %v4, i8 %e5, i32 4 + %v6 = insertelement <6 x i8> %v5, i8 %e6, i32 5 + ret <6 x i8> %v6 +} + +define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) { +; RV32-ONLY-LABEL: buildvec_v4i16_pack: +; RV32-ONLY: # %bb.0: +; RV32-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-ONLY-NEXT: vmv.v.x v8, a0 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV32-ONLY-NEXT: ret +; +; RV32VB-LABEL: buildvec_v4i16_pack: +; RV32VB: # %bb.0: +; RV32VB-NEXT: slli a3, a3, 16 +; RV32VB-NEXT: zext.h a2, a2 +; RV32VB-NEXT: or a2, a2, a3 +; RV32VB-NEXT: slli a1, a1, 16 +; RV32VB-NEXT: zext.h a0, a0 +; RV32VB-NEXT: or a0, a0, a1 +; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32VB-NEXT: vmv.v.x v8, a0 +; RV32VB-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-NEXT: ret +; +; RV64V-ONLY-LABEL: buildvec_v4i16_pack: +; RV64V-ONLY: # %bb.0: +; RV64V-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64V-ONLY-NEXT: vmv.v.x v8, a0 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV64V-ONLY-NEXT: ret +; +; RVA22U64-LABEL: buildvec_v4i16_pack: +; RVA22U64: # %bb.0: +; RVA22U64-NEXT: slli a3, a3, 48 +; RVA22U64-NEXT: zext.h a2, a2 +; RVA22U64-NEXT: slli a2, a2, 32 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: zext.h a0, a0 +; RVA22U64-NEXT: zext.h a1, a1 +; RVA22U64-NEXT: slli a1, a1, 16 +; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.s.x v8, a0 +; RVA22U64-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_v4i16_pack: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64ZVE32-NEXT: vmv.v.x v8, a0 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32-NEXT: ret + %v1 = insertelement <4 x i16> poison, i16 %e1, i32 0 + %v2 = insertelement <4 x i16> %v1, i16 %e2, i32 1 + %v3 = insertelement <4 x i16> %v2, i16 %e3, i32 2 + %v4 = insertelement <4 x i16> %v3, i16 %e4, i32 3 + ret <4 x i16> %v4 +} + +define <2 x i32> @buildvec_v2i32_pack(i32 %e1, i32 %e2) { +; RV32-LABEL: buildvec_v2i32_pack: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: ret +; +; RV64V-ONLY-LABEL: buildvec_v2i32_pack: +; RV64V-ONLY: # %bb.0: +; RV64V-ONLY-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64V-ONLY-NEXT: vmv.v.x v8, a0 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-ONLY-NEXT: ret +; +; RVA22U64-LABEL: buildvec_v2i32_pack: +; RVA22U64: # %bb.0: +; RVA22U64-NEXT: slli a1, a1, 32 +; RVA22U64-NEXT: add.uw a0, a0, a1 +; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.s.x v8, a0 +; RVA22U64-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_v2i32_pack: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32-NEXT: vmv.v.x v8, a0 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32-NEXT: ret + %v1 = insertelement <2 x i32> poison, i32 %e1, i32 0 + %v2 = insertelement <2 x i32> %v1, i32 %e2, i32 1 + ret <2 x i32> %v2 +} + +define <1 x i16> @buildvec_v1i16_pack(i16 %e1) { +; CHECK-LABEL: buildvec_v1i16_pack: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret + %v1 = insertelement <1 x i16> poison, i16 %e1, i32 0 + ret <1 x i16> %v1 +} + +define <1 x i32> @buildvec_v1i32_pack(i32 %e1) { +; CHECK-LABEL: buildvec_v1i32_pack: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret + %v1 = insertelement <1 x i32> poison, i32 %e1, i32 0 + ret <1 x i32> %v1 +} + diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-01.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-01.ll index e932210d3e71fb..da687095045ff5 100644 --- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-01.ll +++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-01.ll @@ -43,10 +43,10 @@ define i32 @f2(i32 %arg) { define i32 @f3(i32 %arg) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: -; CHECK-NEXT: llgfr %r0, %r2 +; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d +; CHECK-NEXT: sllg %r0, %r2, 32 ; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2q ; CHECK-NEXT: br %r14 %1 = tail call i32 @llvm.ctlz.i32(i32 %arg, i1 true) ret i32 %1 @@ -69,10 +69,9 @@ define i16 @f5(i16 %arg) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d -; CHECK-NEXT: llghr %r0, %r2 -; CHECK-NEXT: flogr %r0, %r0 -; CHECK-NEXT: aghi %r0, -32 -; CHECK-NEXT: ahik %r2, %r0, -16 +; CHECK-NEXT: sllg %r0, %r2, 48 +; CHECK-NEXT: flogr %r2, %r0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2q ; CHECK-NEXT: br %r14 %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true) ret i16 %1 @@ -95,10 +94,9 @@ define i8 @f7(i8 %arg) { ; CHECK-LABEL: f7: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d -; CHECK-NEXT: llgcr %r0, %r2 -; CHECK-NEXT: flogr %r0, %r0 -; CHECK-NEXT: aghi %r0, -32 -; CHECK-NEXT: ahik %r2, %r0, -24 +; CHECK-NEXT: sllg %r0, %r2, 56 +; CHECK-NEXT: flogr %r2, %r0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2q ; CHECK-NEXT: br %r14 %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true) ret i8 %1 diff --git a/llvm/test/CodeGen/VE/Scalar/ctlz.ll b/llvm/test/CodeGen/VE/Scalar/ctlz.ll index c8c2b11c5eef61..602b9a86bf0320 100644 --- a/llvm/test/CodeGen/VE/Scalar/ctlz.ll +++ b/llvm/test/CodeGen/VE/Scalar/ctlz.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s declare i128 @llvm.ctlz.i128(i128, i1) @@ -31,9 +32,8 @@ define i64 @func64(i64 %p) { define signext i32 @func32s(i32 signext %p) { ; CHECK-LABEL: func32s: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: sll %s0, %s0, 32 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true) ret i32 %r @@ -42,9 +42,8 @@ define signext i32 @func32s(i32 signext %p) { define zeroext i32 @func32z(i32 zeroext %p) { ; CHECK-LABEL: func32z: ; CHECK: # %bb.0: +; CHECK-NEXT: sll %s0, %s0, 32 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true) ret i32 %r @@ -53,11 +52,8 @@ define zeroext i32 @func32z(i32 zeroext %p) { define signext i16 @func16s(i16 signext %p) { ; CHECK-LABEL: func16s: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: sll %s0, %s0, 48 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: adds.w.sx %s0, -16, %s0 -; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 true) ret i16 %r @@ -66,10 +62,8 @@ define signext i16 @func16s(i16 signext %p) { define zeroext i16 @func16z(i16 zeroext %p) { ; CHECK-LABEL: func16z: ; CHECK: # %bb.0: +; CHECK-NEXT: sll %s0, %s0, 48 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: adds.w.sx %s0, -16, %s0 -; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 true) ret i16 %r @@ -78,11 +72,8 @@ define zeroext i16 @func16z(i16 zeroext %p) { define signext i8 @func8s(i8 signext %p) { ; CHECK-LABEL: func8s: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: sll %s0, %s0, 56 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: adds.w.sx %s0, -24, %s0 -; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 true) ret i8 %r @@ -91,10 +82,8 @@ define signext i8 @func8s(i8 signext %p) { define zeroext i8 @func8z(i8 zeroext %p) { ; CHECK-LABEL: func8z: ; CHECK: # %bb.0: +; CHECK-NEXT: sll %s0, %s0, 56 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: adds.w.sx %s0, -24, %s0 -; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 true) ret i8 %r diff --git a/llvm/test/CodeGen/X86/branch-hint.ll b/llvm/test/CodeGen/X86/branch-hint.ll new file mode 100644 index 00000000000000..591fb324e1b7ba --- /dev/null +++ b/llvm/test/CodeGen/X86/branch-hint.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint -branch-hint-probability-threshold=50 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 -mattr=+branch-hint -enable-branch-hint -branch-hint-probability-threshold=60 -tail-dup-placement=false | FileCheck --check-prefix=TH60 %s + + +; Design: Add DS segment override prefix for condition branch who has high +; probability to take (which is greater than the probability threshold of +; enabling branch hint). + +define void @p51(i32 %x, ptr %p) { +; CHECK-LABEL: p51: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: ds +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: retq +; +; TH60-LABEL: p51: +; TH60: # %bb.0: # %entry +; TH60-NEXT: testl %edi, %edi +; TH60-NEXT: je .LBB0_2 +; TH60-NEXT: # %bb.1: # %if.then +; TH60-NEXT: movl %edi, (%rsi) +; TH60-NEXT: .LBB0_2: # %if.end +; TH60-NEXT: retq +entry: + %tobool.not = icmp eq i32 %x, 0 + br i1 %tobool.not, label %if.end, label %if.then, !prof !0 + +if.then: + store i32 %x, ptr %p, align 4 + br label %if.end + +if.end: + ret void +} + +define void @p61(i32 %x, ptr %p) { +; CHECK-LABEL: p61: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: # %bb.2: # %if.end +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_1: # %if.then +; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: retq +; +; TH60-LABEL: p61: +; TH60: # %bb.0: # %entry +; TH60-NEXT: testl %edi, %edi +; TH60-NEXT: ds +; TH60-NEXT: je .LBB1_2 +; TH60-NEXT: # %bb.1: # %if.then +; TH60-NEXT: movl %edi, (%rsi) +; TH60-NEXT: .LBB1_2: # %if.end +; TH60-NEXT: retq +entry: + %tobool.not = icmp eq i32 %x, 0 + br i1 %tobool.not, label %if.end, label %if.then, !prof !1 + +if.then: + store i32 %x, ptr %p, align 4 + br label %if.end + +if.end: + ret void +} + +!0 = !{!"branch_weights", i32 51, i32 49} +!1 = !{!"branch_weights", i32 61, i32 39} \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/ctlo.ll b/llvm/test/CodeGen/X86/ctlo.ll index d735ca5d446cd2..7431f94f0fdf24 100644 --- a/llvm/test/CodeGen/X86/ctlo.ll +++ b/llvm/test/CodeGen/X86/ctlo.ll @@ -92,8 +92,8 @@ define i8 @ctlo_i8_undef(i8 %x) { ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-CLZ-NEXT: notb %al ; X86-CLZ-NEXT: movzbl %al, %eax +; X86-CLZ-NEXT: shll $24, %eax ; X86-CLZ-NEXT: lzcntl %eax, %eax -; X86-CLZ-NEXT: addl $-24, %eax ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X86-CLZ-NEXT: retl ; @@ -101,8 +101,8 @@ define i8 @ctlo_i8_undef(i8 %x) { ; X64-CLZ: # %bb.0: ; X64-CLZ-NEXT: notb %dil ; X64-CLZ-NEXT: movzbl %dil, %eax +; X64-CLZ-NEXT: shll $24, %eax ; X64-CLZ-NEXT: lzcntl %eax, %eax -; X64-CLZ-NEXT: addl $-24, %eax ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X64-CLZ-NEXT: retq %tmp1 = xor i8 %x, -1 diff --git a/llvm/test/CodeGen/X86/ctlz.ll b/llvm/test/CodeGen/X86/ctlz.ll index bd3d36903ee94a..87dca62d741687 100644 --- a/llvm/test/CodeGen/X86/ctlz.ll +++ b/llvm/test/CodeGen/X86/ctlz.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 @@ -31,33 +31,31 @@ define i8 @ctlz_i8(i8 %x) { ; ; X86-CLZ-LABEL: ctlz_i8: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-CLZ-NEXT: shll $24, %eax ; X86-CLZ-NEXT: lzcntl %eax, %eax -; X86-CLZ-NEXT: addl $-24, %eax ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X86-CLZ-NEXT: retl ; ; X64-CLZ-LABEL: ctlz_i8: ; X64-CLZ: # %bb.0: -; X64-CLZ-NEXT: movzbl %dil, %eax -; X64-CLZ-NEXT: lzcntl %eax, %eax -; X64-CLZ-NEXT: addl $-24, %eax +; X64-CLZ-NEXT: shll $24, %edi +; X64-CLZ-NEXT: lzcntl %edi, %eax ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X64-CLZ-NEXT: retq ; ; X64-FASTLZCNT-LABEL: ctlz_i8: ; X64-FASTLZCNT: # %bb.0: -; X64-FASTLZCNT-NEXT: movzbl %dil, %eax -; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X64-FASTLZCNT-NEXT: addl $-24, %eax +; X64-FASTLZCNT-NEXT: shll $24, %edi +; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X64-FASTLZCNT-NEXT: retq ; ; X86-FASTLZCNT-LABEL: ctlz_i8: ; X86-FASTLZCNT: # %bb.0: -; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FASTLZCNT-NEXT: shll $24, %eax ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X86-FASTLZCNT-NEXT: addl $-24, %eax ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-FASTLZCNT-NEXT: retl %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true ) @@ -664,8 +662,8 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-CLZ-NEXT: orb $64, %al ; X86-CLZ-NEXT: movzbl %al, %eax +; X86-CLZ-NEXT: shll $24, %eax ; X86-CLZ-NEXT: lzcntl %eax, %eax -; X86-CLZ-NEXT: addl $-24, %eax ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X86-CLZ-NEXT: retl ; @@ -673,8 +671,8 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; X64-CLZ: # %bb.0: ; X64-CLZ-NEXT: orb $64, %dil ; X64-CLZ-NEXT: movzbl %dil, %eax +; X64-CLZ-NEXT: shll $24, %eax ; X64-CLZ-NEXT: lzcntl %eax, %eax -; X64-CLZ-NEXT: addl $-24, %eax ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X64-CLZ-NEXT: retq ; @@ -682,8 +680,8 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; X64-FASTLZCNT: # %bb.0: ; X64-FASTLZCNT-NEXT: orb $64, %dil ; X64-FASTLZCNT-NEXT: movzbl %dil, %eax +; X64-FASTLZCNT-NEXT: shll $24, %eax ; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X64-FASTLZCNT-NEXT: addl $-24, %eax ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X64-FASTLZCNT-NEXT: retq ; @@ -692,8 +690,8 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-FASTLZCNT-NEXT: orb $64, %al ; X86-FASTLZCNT-NEXT: movzbl %al, %eax +; X86-FASTLZCNT-NEXT: shll $24, %eax ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X86-FASTLZCNT-NEXT: addl $-24, %eax ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-FASTLZCNT-NEXT: retl @@ -927,18 +925,17 @@ define i8 @ctlz_xor7_i8_true(i8 %x) { ; ; X64-FASTLZCNT-LABEL: ctlz_xor7_i8_true: ; X64-FASTLZCNT: # %bb.0: -; X64-FASTLZCNT-NEXT: movzbl %dil, %eax -; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X64-FASTLZCNT-NEXT: addl $-24, %eax +; X64-FASTLZCNT-NEXT: shll $24, %edi +; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax ; X64-FASTLZCNT-NEXT: xorb $7, %al ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X64-FASTLZCNT-NEXT: retq ; ; X86-FASTLZCNT-LABEL: ctlz_xor7_i8_true: ; X86-FASTLZCNT: # %bb.0: -; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FASTLZCNT-NEXT: shll $24, %eax ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X86-FASTLZCNT-NEXT: addl $-24, %eax ; X86-FASTLZCNT-NEXT: xorb $7, %al ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-FASTLZCNT-NEXT: retl diff --git a/llvm/test/CodeGen/X86/lzcnt.ll b/llvm/test/CodeGen/X86/lzcnt.ll index 68cef3f9363f99..b0004019734168 100644 --- a/llvm/test/CodeGen/X86/lzcnt.ll +++ b/llvm/test/CodeGen/X86/lzcnt.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=i686-- -mattr=+lzcnt | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=+lzcnt | FileCheck %s --check-prefix=X32 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+lzcnt | FileCheck %s --check-prefix=X64 @@ -106,25 +106,23 @@ define i64 @t4(i64 %x) nounwind { define i8 @t5(i8 %x) nounwind { ; X86-LABEL: t5: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $24, %eax ; X86-NEXT: lzcntl %eax, %eax -; X86-NEXT: addl $-24, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X32-LABEL: t5: ; X32: # %bb.0: -; X32-NEXT: movzbl %dil, %eax -; X32-NEXT: lzcntl %eax, %eax -; X32-NEXT: addl $-24, %eax +; X32-NEXT: shll $24, %edi +; X32-NEXT: lzcntl %edi, %eax ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retq ; ; X64-LABEL: t5: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: lzcntl %eax, %eax -; X64-NEXT: addl $-24, %eax +; X64-NEXT: shll $24, %edi +; X64-NEXT: lzcntl %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true ) diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 9bee9d0de88aea..19774b72051092 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -119,6 +119,7 @@ ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: Live Range Shrink +; CHECK-NEXT: X86 Windows Fixup Buffer Security Check ; CHECK-NEXT: X86 Fixup SetCC ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: X86 LEA Optimize diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll index fbc363f77ec42c..e710d3f95e6f46 100644 --- a/llvm/test/CodeGen/X86/pr38539.ll +++ b/llvm/test/CodeGen/X86/pr38539.ll @@ -22,79 +22,84 @@ define void @f() nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $160, %esp -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl $176, %esp +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzbl (%eax), %eax ; X86-NEXT: movzbl (%eax), %ecx ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: divb %cl -; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %edi, %eax ; X86-NEXT: shll $30, %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarl $30, %ecx ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %eax, %edx ; X86-NEXT: xorl %eax, %edi +; X86-NEXT: xorl %eax, %edx ; X86-NEXT: shrdl $1, %eax, %ecx ; X86-NEXT: xorl %ecx, %esi ; X86-NEXT: subl %ecx, %esi -; X86-NEXT: sbbl %eax, %edi ; X86-NEXT: sbbl %eax, %edx -; X86-NEXT: andl $3, %edx -; X86-NEXT: testl %edi, %edi +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: shldl $30, %edx, %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $30, %esi, %edx +; X86-NEXT: testl %ecx, %ecx ; X86-NEXT: jne .LBB0_1 ; X86-NEXT: # %bb.2: # %BB_udiv-special-cases -; X86-NEXT: bsrl %esi, %eax +; X86-NEXT: bsrl %edx, %eax ; X86-NEXT: xorl $31, %eax ; X86-NEXT: addl $32, %eax ; X86-NEXT: jmp .LBB0_3 ; X86-NEXT: .LBB0_1: -; X86-NEXT: bsrl %edi, %eax +; X86-NEXT: bsrl %ecx, %eax ; X86-NEXT: xorl $31, %eax ; X86-NEXT: .LBB0_3: # %BB_udiv-special-cases -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %edx, %edx ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shll $30, %esi ; X86-NEXT: jne .LBB0_4 ; X86-NEXT: # %bb.5: # %BB_udiv-special-cases -; X86-NEXT: addl $64, %eax -; X86-NEXT: jmp .LBB0_6 +; X86-NEXT: movl $64, %esi +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: je .LBB0_7 +; X86-NEXT: jmp .LBB0_8 ; X86-NEXT: .LBB0_4: -; X86-NEXT: bsrl %edx, %eax -; X86-NEXT: xorl $31, %eax -; X86-NEXT: addl $32, %eax -; X86-NEXT: .LBB0_6: # %BB_udiv-special-cases -; X86-NEXT: subl $62, %eax -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: bsrl %esi, %esi +; X86-NEXT: xorl $31, %esi +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: jne .LBB0_8 +; X86-NEXT: .LBB0_7: # %BB_udiv-special-cases +; X86-NEXT: addl $64, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB0_8: # %BB_udiv-special-cases +; X86-NEXT: leal {{[0-9]+}}(%esp), %esi ; X86-NEXT: addl $-66, %eax +; X86-NEXT: movl $0, %ebx ; X86-NEXT: adcl $-1, %ebx -; X86-NEXT: adcl $3, %ecx -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl $0, %edx +; X86-NEXT: adcl $3, %edx ; X86-NEXT: movb $1, %cl ; X86-NEXT: testb %cl, %cl -; X86-NEXT: jne .LBB0_11 -; X86-NEXT: # %bb.7: # %BB_udiv-special-cases -; X86-NEXT: andl $3, %esi +; X86-NEXT: jne .LBB0_14 +; X86-NEXT: # %bb.9: # %BB_udiv-special-cases +; X86-NEXT: andl $3, %edx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl $65, %ecx -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %ecx +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: je .LBB0_11 -; X86-NEXT: # %bb.8: # %udiv-bb1 +; X86-NEXT: je .LBB0_14 +; X86-NEXT: # %bb.10: # %udiv-bb1 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: addl $1, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: andl $3, %ebx +; X86-NEXT: adcl $0, %edx +; X86-NEXT: andl $3, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb $65, %cl ; X86-NEXT: subb %al, %cl ; X86-NEXT: movb %cl, %ch @@ -102,7 +107,7 @@ define void @f() nounwind { ; X86-NEXT: shrb $3, %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: negb %cl -; X86-NEXT: movsbl %cl, %eax +; X86-NEXT: movsbl %cl, %esi ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -112,28 +117,27 @@ define void @f() nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 120(%esp,%eax), %edi +; X86-NEXT: movl 136(%esp,%esi), %edi ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shll %cl, %edi ; X86-NEXT: notb %cl -; X86-NEXT: movl 112(%esp,%eax), %esi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 116(%esp,%eax), %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: shrl %eax -; X86-NEXT: shrl %cl, %eax +; X86-NEXT: movl 128(%esp,%esi), %ebx +; X86-NEXT: movl 132(%esp,%esi), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shrl %esi +; X86-NEXT: shrl %cl, %esi ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %esi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shll %cl, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: je .LBB0_11 -; X86-NEXT: # %bb.9: # %udiv-preheader -; X86-NEXT: orl %eax, %edi +; X86-NEXT: je .LBB0_13 +; X86-NEXT: # %bb.11: # %udiv-preheader +; X86-NEXT: andl $3, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: orl %esi, %edi ; X86-NEXT: andl $3, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -150,19 +154,19 @@ define void @f() nounwind { ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl 64(%esp,%eax), %edi -; X86-NEXT: movl 68(%esp,%eax), %edx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movzbl %al, %edx +; X86-NEXT: movl 80(%esp,%edx), %edi +; X86-NEXT: movl 84(%esp,%edx), %eax +; X86-NEXT: movl %eax, %esi ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: notb %cl -; X86-NEXT: movl 72(%esp,%eax), %ebx +; X86-NEXT: movl 88(%esp,%edx), %ebx ; X86-NEXT: addl %ebx, %ebx ; X86-NEXT: shll %cl, %ebx ; X86-NEXT: orl %esi, %ebx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %edx, %edi +; X86-NEXT: shrdl %cl, %eax, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -178,51 +182,52 @@ define void @f() nounwind { ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB0_10: # %udiv-do-while +; X86-NEXT: .LBB0_12: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ebx, %edi ; X86-NEXT: shldl $1, %ebx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl $1, %ebx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl $1, %edi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, %edx -; X86-NEXT: andl $2, %edx -; X86-NEXT: shrl %edx -; X86-NEXT: leal (%edx,%ebx,2), %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: andl $2, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%edi,2), %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: shldl $1, %edx, %esi -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: orl %ebx, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: shldl $1, %eax, %edx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %eax, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: andl $3, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: sbbl %ebx, %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: sbbl %ecx, %esi ; X86-NEXT: shll $30, %esi -; X86-NEXT: movl %esi, %edx -; X86-NEXT: sarl $30, %edx -; X86-NEXT: sarl $31, %esi -; X86-NEXT: shrdl $1, %esi, %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl %esi, %eax +; X86-NEXT: sarl $30, %eax +; X86-NEXT: sarl $31, %esi +; X86-NEXT: shrdl $1, %esi, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %esi, %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: subl %edx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: subl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: andl $3, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl $-1, %eax @@ -236,12 +241,13 @@ define void @f() nounwind { ; X86-NEXT: orl %esi, %eax ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: orl %edx, %eax -; X86-NEXT: jne .LBB0_10 -; X86-NEXT: .LBB0_11: # %udiv-end +; X86-NEXT: jne .LBB0_12 +; X86-NEXT: .LBB0_13: # %udiv-loop-exit +; X86-NEXT: leal {{[0-9]+}}(%esp), %esi +; X86-NEXT: .LBB0_14: # %udiv-end ; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload ; X86-NEXT: setne (%eax) -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%eax) +; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movb $0, (%eax) ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/pr97968.ll b/llvm/test/CodeGen/X86/pr97968.ll new file mode 100644 index 00000000000000..c8a0536ac43169 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr97968.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s + +define <2 x i32> @PR97968(<16 x i32> %a0) { +; CHECK-LABEL: PR97968: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [2,7,2,7] +; CHECK-NEXT: # xmm1 = mem[0,0] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %sub0 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> + %sub1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> + %elt0 = extractelement <4 x i32> %sub0, i64 2 + %elt7 = extractelement <4 x i32> %sub1, i64 3 + %scl0 = insertelement <2 x i32> undef, i32 %elt0, i32 0 + %scl1 = insertelement <2 x i32> %scl0, i32 %elt7, i32 1 + ret <2 x i32> %scl1 +} diff --git a/llvm/test/CodeGen/X86/stack-protector-msvc.ll b/llvm/test/CodeGen/X86/stack-protector-msvc.ll index 7cb36aa9db6728..d718062d2c485c 100644 --- a/llvm/test/CodeGen/X86/stack-protector-msvc.ll +++ b/llvm/test/CodeGen/X86/stack-protector-msvc.ll @@ -49,10 +49,15 @@ define void @test(ptr %a) nounwind ssp { ; MSVC-X64-NEXT: callq printf ; MSVC-X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; MSVC-X64-NEXT: xorq %rsp, %rcx -; MSVC-X64-NEXT: callq __security_check_cookie +; MSVC-X64-NEXT: cmpq __security_cookie(%rip), %rcx +; MSVC-X64-NEXT: jne .LBB0_2 +; MSVC-X64-NEXT: # %bb.1: ; MSVC-X64-NEXT: addq $64, %rsp ; MSVC-X64-NEXT: popq %rsi ; MSVC-X64-NEXT: retq +; MSVC-X64-NEXT: .LBB0_2: +; MSVC-X64-NEXT: callq __security_check_cookie +; MSVC-X64-NEXT: int3 ; ; MSVC-X86-O0-LABEL: test: ; MSVC-X86-O0: # %bb.0: # %entry @@ -155,11 +160,17 @@ define void @test_vla(i32 %n) nounwind ssp { ; MSVC-X64-NEXT: addq $32, %rsp ; MSVC-X64-NEXT: movq -8(%rbp), %rcx ; MSVC-X64-NEXT: xorq %rbp, %rcx -; MSVC-X64-NEXT: subq $32, %rsp -; MSVC-X64-NEXT: callq __security_check_cookie +; MSVC-X64-NEXT: cmpq __security_cookie(%rip), %rcx +; MSVC-X64-NEXT: jne .LBB1_2 +; MSVC-X64-NEXT: # %bb.1: ; MSVC-X64-NEXT: movq %rbp, %rsp ; MSVC-X64-NEXT: popq %rbp ; MSVC-X64-NEXT: retq +; MSVC-X64-NEXT: .LBB1_2: +; MSVC-X64-NEXT: subq $32, %rsp +; MSVC-X64-NEXT: callq __security_check_cookie +; MSVC-X64-NEXT: addq $32, %rsp +; MSVC-X64-NEXT: int3 ; ; MSVC-X86-O0-LABEL: test_vla: ; MSVC-X86-O0: # %bb.0: @@ -277,13 +288,19 @@ define void @test_vla_realign(i32 %n) nounwind ssp { ; MSVC-X64-NEXT: addq $32, %rsp ; MSVC-X64-NEXT: movq 24(%rbx), %rcx ; MSVC-X64-NEXT: xorq %rbp, %rcx -; MSVC-X64-NEXT: subq $32, %rsp -; MSVC-X64-NEXT: callq __security_check_cookie +; MSVC-X64-NEXT: cmpq __security_cookie(%rip), %rcx +; MSVC-X64-NEXT: jne .LBB2_2 +; MSVC-X64-NEXT: # %bb.1: ; MSVC-X64-NEXT: movq %rbp, %rsp ; MSVC-X64-NEXT: popq %rbx ; MSVC-X64-NEXT: popq %rsi ; MSVC-X64-NEXT: popq %rbp ; MSVC-X64-NEXT: retq +; MSVC-X64-NEXT: .LBB2_2: +; MSVC-X64-NEXT: subq $32, %rsp +; MSVC-X64-NEXT: callq __security_check_cookie +; MSVC-X64-NEXT: addq $32, %rsp +; MSVC-X64-NEXT: int3 ; ; MSVC-X86-O0-LABEL: test_vla_realign: ; MSVC-X86-O0: # %bb.0: @@ -360,4 +377,3 @@ define void @test_vla_realign(i32 %n) nounwind ssp { declare ptr @strcpy(ptr, ptr) nounwind declare i32 @printf(ptr, ...) nounwind - diff --git a/llvm/test/CodeGen/X86/tailcc-ssp.ll b/llvm/test/CodeGen/X86/tailcc-ssp.ll index 012c8aa5d969c0..81b6c9882fd995 100644 --- a/llvm/test/CodeGen/X86/tailcc-ssp.ll +++ b/llvm/test/CodeGen/X86/tailcc-ssp.ll @@ -15,12 +15,17 @@ define tailcc void @tailcall_frame(ptr %0, i64 %1) sspreq { ; WINDOWS-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; WINDOWS-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; WINDOWS-NEXT: xorq %rsp, %rcx -; WINDOWS-NEXT: callq __security_check_cookie +; WINDOWS-NEXT: cmpq __security_cookie(%rip), %rcx +; WINDOWS-NEXT: jne .LBB0_1 +; WINDOWS-NEXT: # %bb.2: ; WINDOWS-NEXT: xorl %ecx, %ecx ; WINDOWS-NEXT: xorl %edx, %edx ; WINDOWS-NEXT: xorl %r8d, %r8d ; WINDOWS-NEXT: addq $56, %rsp ; WINDOWS-NEXT: jmp h # TAILCALL +; WINDOWS-NEXT: .LBB0_1: +; WINDOWS-NEXT: callq __security_check_cookie +; WINDOWS-NEXT: int3 ; WINDOWS-NEXT: .seh_endproc ; ; LINUX-LABEL: tailcall_frame: @@ -42,6 +47,7 @@ define tailcc void @tailcall_frame(ptr %0, i64 %1) sspreq { ; LINUX-NEXT: .LBB0_2: # %CallStackCheckFailBlk ; LINUX-NEXT: .cfi_def_cfa_offset 32 ; LINUX-NEXT: callq __stack_chk_fail@PLT + tail call tailcc void @h(ptr null, i64 0, ptr null) ret void } @@ -59,12 +65,16 @@ define void @tailcall_unrelated_frame() sspreq { ; WINDOWS-NEXT: callq bar ; WINDOWS-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; WINDOWS-NEXT: xorq %rsp, %rcx -; WINDOWS-NEXT: callq __security_check_cookie -; WINDOWS-NEXT: nop +; WINDOWS-NEXT: cmpq __security_cookie(%rip), %rcx +; WINDOWS-NEXT: jne .LBB1_1 +; WINDOWS-NEXT: # %bb.2: ; WINDOWS-NEXT: addq $40, %rsp ; WINDOWS-NEXT: jmp bar # TAILCALL +; WINDOWS-NEXT: .LBB1_1: +; WINDOWS-NEXT: callq __security_check_cookie +; WINDOWS-NEXT: int3 ; WINDOWS-NEXT: .seh_endproc -; + ; LINUX-LABEL: tailcall_unrelated_frame: ; LINUX: # %bb.0: ; LINUX-NEXT: pushq %rax @@ -82,6 +92,7 @@ define void @tailcall_unrelated_frame() sspreq { ; LINUX-NEXT: .LBB1_2: # %CallStackCheckFailBlk ; LINUX-NEXT: .cfi_def_cfa_offset 16 ; LINUX-NEXT: callq __stack_chk_fail@PLT + call void @bar() tail call void @bar() ret void diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/slp-vectorizer/merge-scalars.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/slp-vectorizer/merge-scalars.ll index daa534fcd0c227..0315adb5452b40 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/slp-vectorizer/merge-scalars.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/slp-vectorizer/merge-scalars.ll @@ -24,11 +24,11 @@ ;; Test that dbg.assigns linked to the the scalar stores to quad get linked to ;; the vector store that replaces them. -; CHECK: #dbg_assign(float undef, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[ID:[0-9]+]], ptr %arrayidx, !DIExpression(), -; CHECK: #dbg_assign(float undef, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 4), -; CHECK: #dbg_assign(float undef, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 8), +; CHECK: #dbg_assign(float poison, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[ID:[0-9]+]], ptr %arrayidx, !DIExpression(), +; CHECK: #dbg_assign(float poison, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 4), +; CHECK: #dbg_assign(float poison, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 8), ; CHECK: store <4 x float> {{.*}} !DIAssignID ![[ID]] -; CHECK: #dbg_assign(float undef, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 12), +; CHECK: #dbg_assign(float poison, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 12), target triple = "x86_64-unknown-unknown" diff --git a/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll b/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll index 82b65fe33cd600..320361ed3c0925 100644 --- a/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll +++ b/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll @@ -2,10 +2,10 @@ ; Check that atomic memory operations receive PC sections metadata. -; CHECK: @__start_sanmd_atomics = extern_weak hidden global ptr -; CHECK: @__stop_sanmd_atomics = extern_weak hidden global ptr -; CHECK: @__start_sanmd_covered = extern_weak hidden global ptr -; CHECK: @__stop_sanmd_covered = extern_weak hidden global ptr +; CHECK: @__start_sanmd_atomics2 = extern_weak hidden global ptr +; CHECK: @__stop_sanmd_atomics2 = extern_weak hidden global ptr +; CHECK: @__start_sanmd_covered2 = extern_weak hidden global ptr +; CHECK: @__stop_sanmd_covered2 = extern_weak hidden global ptr target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -2035,46 +2035,46 @@ entry: ; Check that callbacks are emitted. -; CHECK-LABEL: __sanitizer_metadata_atomics.module_ctor +; CHECK-LABEL: __sanitizer_metadata_atomics2.module_ctor ; CHECK-DAG: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr @__sanitizer_metadata_atomics_add, null ; CHECK-NEXT: br i1 [[CMP]], label %callfunc, label %ret ; CHECK-DAG: callfunc: -; CHECK-NEXT: call void @__sanitizer_metadata_atomics_add(i32 2, ptr @__start_sanmd_atomics, ptr @__stop_sanmd_atomics) +; CHECK-NEXT: call void @__sanitizer_metadata_atomics_add(i32 2, ptr @__start_sanmd_atomics2, ptr @__stop_sanmd_atomics2) ; CHECK-NEXT: br label %ret ; CHECK-DAG: ret: ; CHECK-NEXT: ret void -; CHECK-LABEL: __sanitizer_metadata_atomics.module_dtor +; CHECK-LABEL: __sanitizer_metadata_atomics2.module_dtor ; CHECK-DAG: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr @__sanitizer_metadata_atomics_del, null ; CHECK-NEXT: br i1 [[CMP]], label %callfunc, label %ret ; CHECK-DAG: callfunc: -; CHECK-NEXT: call void @__sanitizer_metadata_atomics_del(i32 2, ptr @__start_sanmd_atomics, ptr @__stop_sanmd_atomics) +; CHECK-NEXT: call void @__sanitizer_metadata_atomics_del(i32 2, ptr @__start_sanmd_atomics2, ptr @__stop_sanmd_atomics2) ; CHECK-NEXT: br label %ret ; CHECK-DAG: ret: ; CHECK-NEXT: ret void -; CHECK-LABEL: __sanitizer_metadata_covered.module_ctor +; CHECK-LABEL: __sanitizer_metadata_covered2.module_ctor ; CHECK-DAG: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr @__sanitizer_metadata_covered_add, null ; CHECK-NEXT: br i1 [[CMP]], label %callfunc, label %ret ; CHECK-DAG: callfunc: -; CHECK-NEXT: call void @__sanitizer_metadata_covered_add(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered) +; CHECK-NEXT: call void @__sanitizer_metadata_covered_add(i32 2, ptr @__start_sanmd_covered2, ptr @__stop_sanmd_covered2) ; CHECK-NEXT: br label %ret ; CHECK-DAG: ret: ; CHECK-NEXT: ret void -; CHECK-LABEL: __sanitizer_metadata_covered.module_dtor +; CHECK-LABEL: __sanitizer_metadata_covered2.module_dtor ; CHECK-DAG: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr @__sanitizer_metadata_covered_del, null ; CHECK-NEXT: br i1 [[CMP]], label %callfunc, label %ret ; CHECK-DAG: callfunc: -; CHECK-NEXT: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered) +; CHECK-NEXT: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered2, ptr @__stop_sanmd_covered2) ; CHECK-NEXT: br label %ret ; CHECK-DAG: ret: ; CHECK-NEXT: ret void -; CHECK: !0 = !{!"sanmd_covered!C", !1} +; CHECK: !0 = !{!"sanmd_covered2!C", !1} ; CHECK: !1 = !{i64 1} -; CHECK: !2 = !{!"sanmd_atomics!C"} +; CHECK: !2 = !{!"sanmd_atomics2!C"} diff --git a/llvm/test/Instrumentation/SanitizerBinaryMetadata/ctor.ll b/llvm/test/Instrumentation/SanitizerBinaryMetadata/ctor.ll index 55f92ea3026ec4..bccc90e9478584 100644 --- a/llvm/test/Instrumentation/SanitizerBinaryMetadata/ctor.ll +++ b/llvm/test/Instrumentation/SanitizerBinaryMetadata/ctor.ll @@ -1,16 +1,16 @@ ; RUN: opt < %s -passes='module(sanmd-module)' -sanitizer-metadata-atomics -S | FileCheck %s -; CHECK: $__sanitizer_metadata_atomics.module_ctor = comdat any -; CHECK: $__sanitizer_metadata_atomics.module_dtor = comdat any -; CHECK: $__sanitizer_metadata_covered.module_ctor = comdat any -; CHECK: $__sanitizer_metadata_covered.module_dtor = comdat any +; CHECK: $__sanitizer_metadata_atomics2.module_ctor = comdat any +; CHECK: $__sanitizer_metadata_atomics2.module_dtor = comdat any +; CHECK: $__sanitizer_metadata_covered2.module_ctor = comdat any +; CHECK: $__sanitizer_metadata_covered2.module_dtor = comdat any -; CHECK: @llvm.used = appending global [4 x ptr] [ptr @__sanitizer_metadata_atomics.module_ctor, ptr @__sanitizer_metadata_atomics.module_dtor, ptr @__sanitizer_metadata_covered.module_ctor, ptr @__sanitizer_metadata_covered.module_dtor], section "llvm.metadata" -; CHECK: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics.module_ctor, ptr @__sanitizer_metadata_atomics.module_ctor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered.module_ctor, ptr @__sanitizer_metadata_covered.module_ctor }] -; CHECK: @llvm.global_dtors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics.module_dtor, ptr @__sanitizer_metadata_atomics.module_dtor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered.module_dtor, ptr @__sanitizer_metadata_covered.module_dtor }] +; CHECK: @llvm.used = appending global [4 x ptr] [ptr @__sanitizer_metadata_atomics2.module_ctor, ptr @__sanitizer_metadata_atomics2.module_dtor, ptr @__sanitizer_metadata_covered2.module_ctor, ptr @__sanitizer_metadata_covered2.module_dtor], section "llvm.metadata" +; CHECK: @llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics2.module_ctor, ptr @__sanitizer_metadata_atomics2.module_ctor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered2.module_ctor, ptr @__sanitizer_metadata_covered2.module_ctor }] +; CHECK: @llvm.global_dtors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_atomics2.module_dtor, ptr @__sanitizer_metadata_atomics2.module_dtor }, { i32, ptr, ptr } { i32 2, ptr @__sanitizer_metadata_covered2.module_dtor, ptr @__sanitizer_metadata_covered2.module_dtor }] -; CHECK: define hidden void @__sanitizer_metadata_covered.module_ctor() #1 comdat { -; CHECK: define hidden void @__sanitizer_metadata_covered.module_dtor() #1 comdat { +; CHECK: define hidden void @__sanitizer_metadata_covered2.module_ctor() #1 comdat { +; CHECK: define hidden void @__sanitizer_metadata_covered2.module_dtor() #1 comdat { target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/MC/ELF/cfi-label.s b/llvm/test/MC/ELF/cfi-label.s new file mode 100644 index 00000000000000..e0df9b8a1b253e --- /dev/null +++ b/llvm/test/MC/ELF/cfi-label.s @@ -0,0 +1,61 @@ +# RUN: llvm-mc -triple x86_64 %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-readelf -sX %t | FileCheck %s --check-prefix=SYMTAB +# RUN: llvm-dwarfdump --eh-frame %t | FileCheck %s + +# RUN: not llvm-mc -filetype=obj -triple=x86_64 --defsym ERR=1 %s -o /dev/null 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR --implicit-check-not=error: + +# ASM: nop +# ASM-NEXT: .cfi_label cfi1 +# ASM-NEXT: .cfi_escape 0x00 +# ASM: .globl cfi2 +# ASM-NEXT: .cfi_label cfi2 +# ASM: nop +# ASM-NEXT: .cfi_label .Lcfi3 + +# SYMTAB: 000000000000002b 0 NOTYPE LOCAL DEFAULT 3 (.eh_frame) cfi1 +# SYMTAB: 000000000000002d 0 NOTYPE GLOBAL DEFAULT 3 (.eh_frame) cfi2 +# SYMTAB-NOT: {{.}} + +# CHECK: DW_CFA_remember_state: +# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x1 +# CHECK-NEXT: DW_CFA_nop: +# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x2 +# CHECK-NEXT: DW_CFA_nop: +# CHECK-NEXT: DW_CFA_nop: +# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x3 +# CHECK-NEXT: DW_CFA_nop: +# CHECK-NEXT: DW_CFA_nop: +# CHECK-NEXT: DW_CFA_nop: +# CHECK-NEXT: DW_CFA_restore_state: + +.globl foo +foo: +.cfi_startproc +.cfi_remember_state +nop +.cfi_label cfi1 +.cfi_escape 0 +nop +.globl cfi2 +.cfi_label cfi2 +.cfi_escape 0, 0 +nop +.cfi_label .Lcfi3 +.cfi_escape 0, 0, 0 +.cfi_restore_state +ret + +# ERR: [[#@LINE+10]]:1: error: this directive must appear between .cfi_startproc and .cfi_endproc directives +.ifdef ERR +# ERR: [[#@LINE+1]]:12: error: symbol 'foo' is already defined +.cfi_label foo +# ERR: [[#@LINE+1]]:12: error: symbol '.Lcfi3' is already defined +.cfi_label .Lcfi3 +.endif +.cfi_endproc + +.ifdef ERR +.cfi_label after_endproc +.endif diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td index 2825ade2d2134d..7bbde818082ce0 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td @@ -85,12 +85,12 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(95), GIMT_Encode2(195), /*)*//*default:*//*Label 4*/ GIMT_Encode4(464), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2([[#LOWER:]]), GIMT_Encode2([[#UPPER:]]), /*)*//*default:*//*Label 4*/ GIMT_Encode4([[#DEFAULT:]]), // CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 0*/ GIMT_Encode4(410), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 1*/ GIMT_Encode4(428), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_FNEG*//*Label 2*/ GIMT_Encode4(440), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_FABS*//*Label 3*/ GIMT_Encode4(452), -// CHECK-NEXT: // Label 0: @410 +// CHECK-NEXT: // Label 0: @[[#%u, mul(UPPER-LOWER, 4) + 10]] // CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ GIMT_Encode4(427), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: // MIs[0] x @@ -136,8 +136,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), // CHECK-NEXT: // Label 8: @463 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 4: @464 +// CHECK-NEXT: // Label 4: @[[#%u, DEFAULT]] // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: 465 bytes +// CHECK-NEXT: }; // Size: [[#%u, DEFAULT + 1]] bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td index 43cdd4f5546bb6..86ae031caecb53 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td @@ -37,10 +37,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(70), GIMT_Encode2(74), /*)*//*default:*//*Label 2*/ GIMT_Encode4(84), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2([[#LOWER:]]), GIMT_Encode2([[#UPPER:]]), /*)*//*default:*//*Label 2*/ GIMT_Encode4([[#DEFAULT:]]), // CHECK-NEXT: /*TargetOpcode::G_UNMERGE_VALUES*//*Label 0*/ GIMT_Encode4(26), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_BUILD_VECTOR*//*Label 1*/ GIMT_Encode4(55), -// CHECK-NEXT: // Label 0: @26 +// CHECK-NEXT: // Label 0: @[[#%u, mul(UPPER-LOWER, 4) + 10]] // CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 3*/ GIMT_Encode4(40), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, @@ -92,8 +92,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), // CHECK-NEXT: // Label 6: @83 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @84 +// CHECK-NEXT: // Label 2: @[[#%u, DEFAULT]] // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: 85 bytes +// CHECK-NEXT: }; // Size: [[#%u, DEFAULT + 1]] bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td index 7ff637fa1e0e37..b52849b6bc931d 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td @@ -135,14 +135,14 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // Verify match table. // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(134), /*)*//*default:*//*Label 6*/ GIMT_Encode4(657), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2([[#LOWER:]]), GIMT_Encode2([[#UPPER:]]), /*)*//*default:*//*Label 6*/ GIMT_Encode4([[#DEFAULT:]]), // CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(470), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(506), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(553), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(587), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(610), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(622), -// CHECK-NEXT: // Label 0: @470 +// CHECK-NEXT: // Label 0: @[[#%u, mul(UPPER-LOWER, 4) + 10]] // CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(494), // Rule ID 4 // // CHECK-NEXT: GIM_CheckFeatures, GIMT_Encode2(GIFBS_HasAnswerToEverything), // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled), @@ -242,8 +242,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_EraseRootFromParent_Done, // CHECK-NEXT: // Label 14: @656 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 6: @657 +// CHECK-NEXT: // Label 6: @[[#%u, DEFAULT]] // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: 658 bytes +// CHECK-NEXT: }; // Size: [[#%u, DEFAULT + 1]] bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll index b4427864d4730d..895c89b768acb3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -96,35 +96,49 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; ; NO-VP-LABEL: @interleave( ; NO-VP-NEXT: entry: -; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 +; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; NO-VP: vector.ph: -; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 -; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], i64 [[TMP10]], i32 0 -; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[TMP1]], i32 0 -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; NO-VP-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 -; NO-VP-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 -; NO-VP-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; NO-VP-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> -; NO-VP-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; NO-VP-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> -; NO-VP-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]] -; NO-VP-NEXT: [[TMP7:%.*]] = add nsw <8 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]] -; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] -; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] -; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0 -; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 8 -; NO-VP-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP12]], align 4 -; NO-VP-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP11]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; NO-VP-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; NO-VP-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], i64 [[TMP6]], i32 0 +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[TMP11]], i32 0 +; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 +; NO-VP-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP14]], align 4 +; NO-VP-NEXT: [[WIDE_VEC1:%.*]] = load , ptr [[TMP15]], align 4 +; NO-VP-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) +; NO-VP-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; NO-VP-NEXT: [[TMP17:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; NO-VP-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC1]]) +; NO-VP-NEXT: [[TMP18:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 +; NO-VP-NEXT: [[TMP19:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 1 +; NO-VP-NEXT: [[TMP20:%.*]] = add nsw [[TMP17]], [[TMP16]] +; NO-VP-NEXT: [[TMP21:%.*]] = add nsw [[TMP19]], [[TMP18]] +; NO-VP-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0 +; NO-VP-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 +; NO-VP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[TMP26]] +; NO-VP-NEXT: store [[TMP20]], ptr [[TMP24]], align 4 +; NO-VP-NEXT: store [[TMP21]], ptr [[TMP27]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll index 7d42d9ae61b05e..bd0423b6f84144 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll @@ -6,35 +6,13 @@ define i32 @main(i32 %arg, ptr nocapture readnone %arg1) #0 { ; CHECK-SAME: i32 [[ARG:%.*]], ptr nocapture readnone [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, align 1 -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = add i8 [[OFFSET_IDX]], 5 -; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[OFFSET_IDX]], 7 -; CHECK-NEXT: store i8 [[TMP8]], ptr [[TMP0]], align 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 -; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 false, label %[[RET:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[STOREMERGE_I_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TMP12_I_I:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[STOREMERGE_I_I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP12_I_I:%.*]], %[[LOOP]] ] ; CHECK-NEXT: store i8 [[STOREMERGE_I_I]], ptr [[TMP0]], align 2 ; CHECK-NEXT: [[TMP8_I_I:%.*]] = icmp ult i8 [[STOREMERGE_I_I]], 8 ; CHECK-NEXT: [[TMP12_I_I]] = add nuw nsw i8 [[STOREMERGE_I_I]], 1 -; CHECK-NEXT: br i1 [[TMP8_I_I]], label %[[LOOP]], label %[[RET]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8_I_I]], label %[[LOOP]], label %[[RET:.*]] ; CHECK: [[RET]]: ; CHECK-NEXT: ret i32 0 ; @@ -55,9 +33,3 @@ ret: attributes #0 = { "target-cpu"="z13" } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll b/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll new file mode 100644 index 00000000000000..5b250c33a45fbd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/ephemeral-recipes.ll @@ -0,0 +1,453 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +define i32 @ephemeral_load_and_compare_iv_used_outside(ptr %start, ptr %end) #0 { +; CHECK-LABEL: define i32 @ephemeral_load_and_compare_iv_used_outside( +; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[START1:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[START1]], [[END2]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 128 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 128 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP4]], i32 4, <32 x i1> , <32 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP5]], i32 4, <32 x i1> , <32 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP6]], i32 4, <32 x i1> , <32 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <32 x i32> @llvm.masked.gather.v32i32.v32p0(<32 x ptr> [[TMP7]], i32 4, <32 x i1> , <32 x i32> poison) +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER3]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER4]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i32> [[WIDE_MASKED_GATHER5]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i1> [[TMP8]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i1> [[TMP8]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i1> [[TMP8]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <32 x i1> [[TMP8]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP15]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i1> [[TMP8]], i32 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP16]]) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i1> [[TMP8]], i32 5 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i1> [[TMP8]], i32 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP18]]) +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i1> [[TMP8]], i32 7 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP19]]) +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i1> [[TMP8]], i32 8 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP20]]) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <32 x i1> [[TMP8]], i32 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i1> [[TMP8]], i32 10 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP22]]) +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i1> [[TMP8]], i32 11 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP23]]) +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <32 x i1> [[TMP8]], i32 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP24]]) +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i1> [[TMP8]], i32 13 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP25]]) +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i1> [[TMP8]], i32 14 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP26]]) +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <32 x i1> [[TMP8]], i32 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP27]]) +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i1> [[TMP8]], i32 16 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP28]]) +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i1> [[TMP8]], i32 17 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP29]]) +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <32 x i1> [[TMP8]], i32 18 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP30]]) +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i1> [[TMP8]], i32 19 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP31]]) +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i1> [[TMP8]], i32 20 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP32]]) +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <32 x i1> [[TMP8]], i32 21 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP33]]) +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i1> [[TMP8]], i32 22 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP34]]) +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i1> [[TMP8]], i32 23 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP35]]) +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <32 x i1> [[TMP8]], i32 24 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP36]]) +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i1> [[TMP8]], i32 25 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP37]]) +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i1> [[TMP8]], i32 26 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP38]]) +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <32 x i1> [[TMP8]], i32 27 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP39]]) +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i1> [[TMP8]], i32 28 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP40]]) +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i1> [[TMP8]], i32 29 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <32 x i1> [[TMP8]], i32 30 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP42]]) +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i1> [[TMP8]], i32 31 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP43]]) +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i1> [[TMP9]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP44]]) +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <32 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i1> [[TMP9]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP46]]) +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i1> [[TMP9]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP47]]) +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <32 x i1> [[TMP9]], i32 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP48]]) +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i1> [[TMP9]], i32 5 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP49]]) +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i1> [[TMP9]], i32 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP50]]) +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <32 x i1> [[TMP9]], i32 7 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP51]]) +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i1> [[TMP9]], i32 8 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP52]]) +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i1> [[TMP9]], i32 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP53]]) +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <32 x i1> [[TMP9]], i32 10 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP54]]) +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i1> [[TMP9]], i32 11 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP55]]) +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i1> [[TMP9]], i32 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP56]]) +; CHECK-NEXT: [[TMP57:%.*]] = extractelement <32 x i1> [[TMP9]], i32 13 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP57]]) +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i1> [[TMP9]], i32 14 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP58]]) +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i1> [[TMP9]], i32 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP59]]) +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <32 x i1> [[TMP9]], i32 16 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP60]]) +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i1> [[TMP9]], i32 17 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP61]]) +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i1> [[TMP9]], i32 18 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP62]]) +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <32 x i1> [[TMP9]], i32 19 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP63]]) +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i1> [[TMP9]], i32 20 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP64]]) +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i1> [[TMP9]], i32 21 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP65]]) +; CHECK-NEXT: [[TMP66:%.*]] = extractelement <32 x i1> [[TMP9]], i32 22 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP66]]) +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i1> [[TMP9]], i32 23 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP67]]) +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i1> [[TMP9]], i32 24 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP68]]) +; CHECK-NEXT: [[TMP69:%.*]] = extractelement <32 x i1> [[TMP9]], i32 25 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP69]]) +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i1> [[TMP9]], i32 26 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP70]]) +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i1> [[TMP9]], i32 27 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP71]]) +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <32 x i1> [[TMP9]], i32 28 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP72]]) +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i1> [[TMP9]], i32 29 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP73]]) +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i1> [[TMP9]], i32 30 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP74]]) +; CHECK-NEXT: [[TMP75:%.*]] = extractelement <32 x i1> [[TMP9]], i32 31 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP75]]) +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP76]]) +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP77]]) +; CHECK-NEXT: [[TMP78:%.*]] = extractelement <32 x i1> [[TMP10]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP78]]) +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i1> [[TMP10]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP79]]) +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i1> [[TMP10]], i32 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP80]]) +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <32 x i1> [[TMP10]], i32 5 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP81]]) +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i1> [[TMP10]], i32 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP82]]) +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i1> [[TMP10]], i32 7 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP83]]) +; CHECK-NEXT: [[TMP84:%.*]] = extractelement <32 x i1> [[TMP10]], i32 8 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP84]]) +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i1> [[TMP10]], i32 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP85]]) +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i1> [[TMP10]], i32 10 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP86]]) +; CHECK-NEXT: [[TMP87:%.*]] = extractelement <32 x i1> [[TMP10]], i32 11 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP87]]) +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i1> [[TMP10]], i32 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP88]]) +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i1> [[TMP10]], i32 13 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP89]]) +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <32 x i1> [[TMP10]], i32 14 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP90]]) +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i1> [[TMP10]], i32 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP91]]) +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i1> [[TMP10]], i32 16 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP92]]) +; CHECK-NEXT: [[TMP93:%.*]] = extractelement <32 x i1> [[TMP10]], i32 17 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP93]]) +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i1> [[TMP10]], i32 18 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP94]]) +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i1> [[TMP10]], i32 19 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP95]]) +; CHECK-NEXT: [[TMP96:%.*]] = extractelement <32 x i1> [[TMP10]], i32 20 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP96]]) +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i1> [[TMP10]], i32 21 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP97]]) +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i1> [[TMP10]], i32 22 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP98]]) +; CHECK-NEXT: [[TMP99:%.*]] = extractelement <32 x i1> [[TMP10]], i32 23 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP99]]) +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <32 x i1> [[TMP10]], i32 24 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP100]]) +; CHECK-NEXT: [[TMP101:%.*]] = extractelement <32 x i1> [[TMP10]], i32 25 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP101]]) +; CHECK-NEXT: [[TMP102:%.*]] = extractelement <32 x i1> [[TMP10]], i32 26 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP102]]) +; CHECK-NEXT: [[TMP103:%.*]] = extractelement <32 x i1> [[TMP10]], i32 27 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP103]]) +; CHECK-NEXT: [[TMP104:%.*]] = extractelement <32 x i1> [[TMP10]], i32 28 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP104]]) +; CHECK-NEXT: [[TMP105:%.*]] = extractelement <32 x i1> [[TMP10]], i32 29 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP105]]) +; CHECK-NEXT: [[TMP106:%.*]] = extractelement <32 x i1> [[TMP10]], i32 30 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP106]]) +; CHECK-NEXT: [[TMP107:%.*]] = extractelement <32 x i1> [[TMP10]], i32 31 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP107]]) +; CHECK-NEXT: [[TMP108:%.*]] = extractelement <32 x i1> [[TMP11]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP108]]) +; CHECK-NEXT: [[TMP109:%.*]] = extractelement <32 x i1> [[TMP11]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP109]]) +; CHECK-NEXT: [[TMP110:%.*]] = extractelement <32 x i1> [[TMP11]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP110]]) +; CHECK-NEXT: [[TMP111:%.*]] = extractelement <32 x i1> [[TMP11]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP111]]) +; CHECK-NEXT: [[TMP112:%.*]] = extractelement <32 x i1> [[TMP11]], i32 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP112]]) +; CHECK-NEXT: [[TMP113:%.*]] = extractelement <32 x i1> [[TMP11]], i32 5 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP113]]) +; CHECK-NEXT: [[TMP114:%.*]] = extractelement <32 x i1> [[TMP11]], i32 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP114]]) +; CHECK-NEXT: [[TMP115:%.*]] = extractelement <32 x i1> [[TMP11]], i32 7 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP115]]) +; CHECK-NEXT: [[TMP116:%.*]] = extractelement <32 x i1> [[TMP11]], i32 8 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP116]]) +; CHECK-NEXT: [[TMP117:%.*]] = extractelement <32 x i1> [[TMP11]], i32 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP117]]) +; CHECK-NEXT: [[TMP118:%.*]] = extractelement <32 x i1> [[TMP11]], i32 10 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP118]]) +; CHECK-NEXT: [[TMP119:%.*]] = extractelement <32 x i1> [[TMP11]], i32 11 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP119]]) +; CHECK-NEXT: [[TMP120:%.*]] = extractelement <32 x i1> [[TMP11]], i32 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP120]]) +; CHECK-NEXT: [[TMP121:%.*]] = extractelement <32 x i1> [[TMP11]], i32 13 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP121]]) +; CHECK-NEXT: [[TMP122:%.*]] = extractelement <32 x i1> [[TMP11]], i32 14 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP122]]) +; CHECK-NEXT: [[TMP123:%.*]] = extractelement <32 x i1> [[TMP11]], i32 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP123]]) +; CHECK-NEXT: [[TMP124:%.*]] = extractelement <32 x i1> [[TMP11]], i32 16 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP124]]) +; CHECK-NEXT: [[TMP125:%.*]] = extractelement <32 x i1> [[TMP11]], i32 17 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP125]]) +; CHECK-NEXT: [[TMP126:%.*]] = extractelement <32 x i1> [[TMP11]], i32 18 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP126]]) +; CHECK-NEXT: [[TMP127:%.*]] = extractelement <32 x i1> [[TMP11]], i32 19 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP127]]) +; CHECK-NEXT: [[TMP128:%.*]] = extractelement <32 x i1> [[TMP11]], i32 20 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP128]]) +; CHECK-NEXT: [[TMP129:%.*]] = extractelement <32 x i1> [[TMP11]], i32 21 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP129]]) +; CHECK-NEXT: [[TMP130:%.*]] = extractelement <32 x i1> [[TMP11]], i32 22 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP130]]) +; CHECK-NEXT: [[TMP131:%.*]] = extractelement <32 x i1> [[TMP11]], i32 23 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP131]]) +; CHECK-NEXT: [[TMP132:%.*]] = extractelement <32 x i1> [[TMP11]], i32 24 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP132]]) +; CHECK-NEXT: [[TMP133:%.*]] = extractelement <32 x i1> [[TMP11]], i32 25 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP133]]) +; CHECK-NEXT: [[TMP134:%.*]] = extractelement <32 x i1> [[TMP11]], i32 26 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP134]]) +; CHECK-NEXT: [[TMP135:%.*]] = extractelement <32 x i1> [[TMP11]], i32 27 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP135]]) +; CHECK-NEXT: [[TMP136:%.*]] = extractelement <32 x i1> [[TMP11]], i32 28 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP136]]) +; CHECK-NEXT: [[TMP137:%.*]] = extractelement <32 x i1> [[TMP11]], i32 29 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP137]]) +; CHECK-NEXT: [[TMP138:%.*]] = extractelement <32 x i1> [[TMP11]], i32 30 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP138]]) +; CHECK-NEXT: [[TMP139:%.*]] = extractelement <32 x i1> [[TMP11]], i32 31 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP139]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 -1024 +; CHECK-NEXT: [[TMP140:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP140]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP141:%.*]] = mul i64 [[CMO]], -8 +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP141]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = getelementptr nusw i8, ptr [[IV]], i64 -8 +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[IV]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[L1]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV]], [[END]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi ptr [ [[IV]], %[[LOOP]] ], [ [[IND_ESCAPE]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[FINAL_LOAD:%.*]] = load i32, ptr [[IV_LCSSA]], align 4 +; CHECK-NEXT: ret i32 [[FINAL_LOAD]] +; +entry: + br label %loop + +loop: + %iv = phi ptr [ %start, %entry ],[ %iv.next, %loop ] + %iv.next = getelementptr nusw i8, ptr %iv, i64 -8 + %l1 = load i32, ptr %iv, align 4 + %cmp = icmp ne i32 %l1, 0 + call void @llvm.assume(i1 %cmp) + %cmp.not = icmp eq ptr %iv, %end + br i1 %cmp.not, label %exit, label %loop + +exit: + %final.load = load i32, ptr %iv, align 4 + ret i32 %final.load +} + +; FIXME: shouldn't be vectorized, as the only vector values generated are +; ephemeral. +define i32 @ephemeral_load_and_compare_another_load_used_outside(ptr %start, ptr %end) #0 { +; CHECK-LABEL: define i32 @ephemeral_load_and_compare_another_load_used_outside( +; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[START1:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[START1]], [[END2]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[END]], align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT9]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER11:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER8]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER11]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP10]]) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP11]]) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP15]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP16]]) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP18]]) +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP19]]) +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP20]]) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP22]]) +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP23]]) +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP24]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT2]] = getelementptr nusw i8, ptr [[IV]], i64 -8 +; CHECK-NEXT: [[L1:%.*]] = load ptr, ptr [[END]], align 8 +; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[L1]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[L2]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV]], [[END]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[L1_LCSSA:%.*]] = phi ptr [ [[L1]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[FINAL_LOAD:%.*]] = load i32, ptr [[L1_LCSSA]], align 4 +; CHECK-NEXT: ret i32 [[FINAL_LOAD]] +; +entry: + br label %loop + +loop: + %iv = phi ptr [ %start, %entry ], [ %iv.next2, %loop ] + %iv.next2 = getelementptr nusw i8, ptr %iv, i64 -8 + %l1 = load ptr, ptr %end, align 8 + %l2 = load i32, ptr %l1, align 4 + %cmp = icmp ne i32 %l2, 0 + call void @llvm.assume(i1 %cmp) + %cmp.not = icmp eq ptr %iv, %end + br i1 %cmp.not, label %exit, label %loop + +exit: + %final.load = load i32, ptr %l1, align 4 + ret i32 %final.load +} + +declare void @llvm.assume(i1 noundef) + +attributes #0 = { "target-cpu"="skylake-avx512" } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 968dc09bec4e67..d2e41c6d36a9fb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -622,38 +622,15 @@ define void @wide_iv_trunc_reuse(ptr %dst) { ; CHECK-LABEL: define void @wide_iv_trunc_reuse( ; CHECK-SAME: ptr [[DST:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 5 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 7 -; CHECK-NEXT: store i32 [[TMP7]], ptr [[DST]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] ; CHECK-NEXT: store i32 [[IV_2]], ptr [[DST]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0 ; CHECK-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -701,6 +678,4 @@ attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" } ; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]} ; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]} ; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]} -; CHECK: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]} -; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/pr32859.ll b/llvm/test/Transforms/LoopVectorize/pr32859.ll index 24e713a7f2cfff..a29a6bd735feb4 100644 --- a/llvm/test/Transforms/LoopVectorize/pr32859.ll +++ b/llvm/test/Transforms/LoopVectorize/pr32859.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S | FileCheck %s ; Out of the LCSSA form we could have 'phi i32 [ loop-invariant, %for.inc.2.i ]' ; but the IR Verifier requires for PHI one entry for each predecessor of diff --git a/llvm/test/Transforms/LoopVectorize/vplan-incomplete-cases.ll b/llvm/test/Transforms/LoopVectorize/vplan-incomplete-cases.ll index 2007155fe54853..361adc1249b23e 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-incomplete-cases.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-incomplete-cases.ll @@ -6,20 +6,9 @@ define void @vplan_incomplete_cases_tc2(i8 %x, i8 %y) { ; CHECK-LABEL: define void @vplan_incomplete_cases_tc2( ; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y]] ; CHECK-NEXT: [[EXTRACT_T:%.*]] = trunc i8 [[AND]] to i1 ; CHECK-NEXT: br i1 [[EXTRACT_T]], label %[[LATCH]], label %[[INDIRECT_LATCH:.*]] @@ -29,7 +18,7 @@ define void @vplan_incomplete_cases_tc2(i8 %x, i8 %y) { ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ZEXT]], 1 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -60,20 +49,9 @@ define void @vplan_incomplete_cases_tc3(i8 %x, i8 %y) { ; CHECK-LABEL: define void @vplan_incomplete_cases_tc3( ; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y]] ; CHECK-NEXT: [[EXTRACT_T:%.*]] = trunc i8 [[AND]] to i1 ; CHECK-NEXT: br i1 [[EXTRACT_T]], label %[[LATCH]], label %[[INDIRECT_LATCH:.*]] @@ -83,7 +61,7 @@ define void @vplan_incomplete_cases_tc3(i8 %x, i8 %y) { ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ZEXT]], 2 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -108,11 +86,3 @@ latch: ; preds = %indirect.latch, l exit: ; preds = %latch ret void } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll index 5232ae76fa8870..9cb2badc25fb20 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll @@ -13,8 +13,8 @@ define void @patatino(i64 %n, i64 %i, ptr %p) !dbg !7 { ; CHECK-NEXT: #dbg_value(i64 [[I:%.*]], [[META19:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) ; CHECK-NEXT: #dbg_value(ptr [[P:%.*]], [[META20:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) ; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[P]], i64 [[N]], i32 0, !dbg [[DBG26:![0-9]+]] -; CHECK-NEXT: #dbg_value(i64 undef, [[META21:![0-9]+]], !DIExpression(), [[META27:![0-9]+]]) -; CHECK-NEXT: #dbg_value(i64 undef, [[META22:![0-9]+]], !DIExpression(), [[META28:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i64 poison, [[META21:![0-9]+]], !DIExpression(), [[META27:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i64 poison, [[META22:![0-9]+]], !DIExpression(), [[META28:![0-9]+]]) ; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 [[I]], i32 0, !dbg [[DBG29:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X1]], align 8, !dbg [[DBG26]], !tbaa [[TBAA30:![0-9]+]] ; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[X5]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA30]] diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll index ff06bdc0e84467..0fcbead65d0d66 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -12,11 +12,11 @@ define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 0), align 4 ; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> , i32 [[ADD277]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[ARRAYIDX372]], align 4 ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll index 24c5fcb0680865..8c4903dbc92bbe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll @@ -503,10 +503,10 @@ define void @add_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -522,10 +522,10 @@ define void @add_v64i8() { ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SLM-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll index fab022d691c07a..cb8d45b1a21a20 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll @@ -401,10 +401,10 @@ define void @add_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll index dafed43e6e71c1..a7ae2d9e02ff4b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll @@ -439,10 +439,10 @@ define void @add_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = add <16 x i8> [[TMP7]], [[TMP8]] +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = add <16 x i8> [[TMP10]], [[TMP11]] -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -458,10 +458,10 @@ define void @add_v64i8() { ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP9:%.*]] = add <16 x i8> [[TMP7]], [[TMP8]] +; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP12:%.*]] = add <16 x i8> [[TMP10]], [[TMP11]] -; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SLM-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll index e4c76daddb02e4..d4eafdeb50a470 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll @@ -520,10 +520,10 @@ define void @smul_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -539,10 +539,10 @@ define void @smul_v64i8() { ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3) +; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3) -; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SLM-NEXT: ret void ; @@ -1323,10 +1323,10 @@ define void @umul_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -1342,10 +1342,10 @@ define void @umul_v64i8() { ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3) +; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3) -; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SLM-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll index 9b8480cd0088a3..16977c025e3eaa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll @@ -480,10 +480,10 @@ define void @fshl_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll index daf28b9a0bb4da..609a9024e5bf7b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll @@ -575,21 +575,21 @@ define void @fshl_v64i8() { ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1 ; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @c8, align 1 ; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +; SSE-NEXT: store <16 x i8> [[TMP4]], ptr @d8, align 1 ; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1 ; SSE-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1 ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1 ; SSE-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]], <16 x i8> [[TMP7]]) +; SSE-NEXT: store <16 x i8> [[TMP8]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 16), align 1 ; SSE-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 -; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 -; SSE-NEXT: [[TMP12:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 -; SSE-NEXT: [[TMP13:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 -; SSE-NEXT: [[TMP14:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 -; SSE-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i8> [[TMP13]]) -; SSE-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP11]], <16 x i8> [[TMP12]], <16 x i8> [[TMP14]]) -; SSE-NEXT: store <16 x i8> [[TMP4]], ptr @d8, align 1 -; SSE-NEXT: store <16 x i8> [[TMP8]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 16), align 1 -; SSE-NEXT: store <16 x i8> [[TMP15]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) +; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP13:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP14:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP15:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i8> [[TMP15]]) ; SSE-NEXT: store <16 x i8> [[TMP16]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -598,11 +598,11 @@ define void @fshl_v64i8() { ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1 ; AVX-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @c8, align 1 ; AVX-NEXT: [[TMP4:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]]) +; AVX-NEXT: store <32 x i8> [[TMP4]], ptr @d8, align 1 ; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; AVX-NEXT: [[TMP6:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; AVX-NEXT: [[TMP7:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; AVX-NEXT: [[TMP8:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[TMP5]], <32 x i8> [[TMP6]], <32 x i8> [[TMP7]]) -; AVX-NEXT: store <32 x i8> [[TMP4]], ptr @d8, align 1 ; AVX-NEXT: store <32 x i8> [[TMP8]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll index f3e73d0e6840e0..090a9daa6a1136 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll @@ -480,10 +480,10 @@ define void @fshr_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll index fb7532768c4b3f..3dc7d164f5bc94 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll @@ -575,21 +575,21 @@ define void @fshr_v64i8() { ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1 ; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @c8, align 1 ; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +; SSE-NEXT: store <16 x i8> [[TMP4]], ptr @d8, align 1 ; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1 ; SSE-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1 ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1 ; SSE-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]], <16 x i8> [[TMP7]]) +; SSE-NEXT: store <16 x i8> [[TMP8]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 16), align 1 ; SSE-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 -; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 -; SSE-NEXT: [[TMP12:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 -; SSE-NEXT: [[TMP13:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 -; SSE-NEXT: [[TMP14:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 -; SSE-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i8> [[TMP13]]) -; SSE-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP11]], <16 x i8> [[TMP12]], <16 x i8> [[TMP14]]) -; SSE-NEXT: store <16 x i8> [[TMP4]], ptr @d8, align 1 -; SSE-NEXT: store <16 x i8> [[TMP8]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 16), align 1 -; SSE-NEXT: store <16 x i8> [[TMP15]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) +; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP13:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP14:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP15:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i8> [[TMP15]]) ; SSE-NEXT: store <16 x i8> [[TMP16]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -598,11 +598,11 @@ define void @fshr_v64i8() { ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1 ; AVX-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @c8, align 1 ; AVX-NEXT: [[TMP4:%.*]] = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]]) +; AVX-NEXT: store <32 x i8> [[TMP4]], ptr @d8, align 1 ; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; AVX-NEXT: [[TMP6:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; AVX-NEXT: [[TMP7:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; AVX-NEXT: [[TMP8:%.*]] = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> [[TMP5]], <32 x i8> [[TMP6]], <32 x i8> [[TMP7]]) -; AVX-NEXT: store <32 x i8> [[TMP4]], ptr @d8, align 1 ; AVX-NEXT: store <32 x i8> [[TMP8]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll index 94976a8cdee252..51cf32242bfdfe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll @@ -528,10 +528,10 @@ define void @mul_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP7]], [[TMP8]] +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = mul <16 x i8> [[TMP10]], [[TMP11]] -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -547,10 +547,10 @@ define void @mul_v64i8() { ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP7]], [[TMP8]] +; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP12:%.*]] = mul <16 x i8> [[TMP10]], [[TMP11]] -; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SLM-NEXT: ret void ; @@ -566,10 +566,10 @@ define void @mul_v64i8() { ; AVX128-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; AVX128-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; AVX128-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP7]], [[TMP8]] +; AVX128-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; AVX128-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; AVX128-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; AVX128-NEXT: [[TMP12:%.*]] = mul <16 x i8> [[TMP10]], [[TMP11]] -; AVX128-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; AVX128-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; AVX128-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll index c63b672f4187cd..dd76992c2570b9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll @@ -385,10 +385,10 @@ define void @smax_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll index 826f97f2a2d895..678477fa1e3977 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll @@ -385,10 +385,10 @@ define void @smin_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll index afaab8b8ca642b..65e2a011cc9a14 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll @@ -503,10 +503,10 @@ define void @sub_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -522,10 +522,10 @@ define void @sub_v64i8() { ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SLM-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll index 3510863c889301..18df499c6646ec 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll @@ -401,10 +401,10 @@ define void @sub_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll index be54c1e04ca39a..9d34edbb506c06 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll @@ -439,10 +439,10 @@ define void @sub_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = sub <16 x i8> [[TMP7]], [[TMP8]] +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = sub <16 x i8> [[TMP10]], [[TMP11]] -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; @@ -458,10 +458,10 @@ define void @sub_v64i8() { ; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP9:%.*]] = sub <16 x i8> [[TMP7]], [[TMP8]] +; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SLM-NEXT: [[TMP12:%.*]] = sub <16 x i8> [[TMP10]], [[TMP11]] -; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SLM-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll index 3a187930055f0e..a3f2b97a08a6e9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll @@ -385,10 +385,10 @@ define void @umax_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll index 15119a96280673..0c7688345ac481 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll @@ -385,10 +385,10 @@ define void @umin_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll index 24b95c4e6ff2f8..4e6ed4bce65889 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll @@ -9,11 +9,10 @@ define void @foo(double %i) { ; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> zeroinitializer, [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> , <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP22]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> , <8 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = fmul <8 x double> , [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> zeroinitializer, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = fadd <8 x double> [[TMP13]], zeroinitializer @@ -27,7 +26,6 @@ define void @foo(double %i) { ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 1 ; CHECK-NEXT: [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]] ; CHECK-NEXT: [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP22]], <4 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractlements-gathered-first-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractlements-gathered-first-node.ll index 57fa83b1ccdd69..d5f2cf7fc28c46 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractlements-gathered-first-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractlements-gathered-first-node.ll @@ -6,7 +6,6 @@ define void @test() { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> zeroinitializer, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> zeroinitializer, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> , i32 [[TMP1]], i32 1 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index e3dc67558af028..f036801865048f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -810,14 +810,9 @@ define float @extra_args_same_several_times(ptr nocapture readonly %x, i32 %a, i ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4 ; THRESHOLD-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]]) -; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[CONV]], i32 1 -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast <2 x float> [[TMP3]], -; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP3]], -; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 -; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[TMP7]], [[TMP8]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP1]], 1.300000e+01 +; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast float [[CONV]], 2.000000e+00 +; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP2]] ; THRESHOLD-NEXT: ret float [[OP_RDX1]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll index 156ab54dbf2372..d88135df5c96a2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll @@ -12,27 +12,18 @@ define void @test() { ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX11]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX31]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP14]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x float> [[TMP11]], [[TMP14]] ; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5 ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[TMP16]] = load float, ptr [[ARRAYIDX41]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP14]], i32 3 ; CHECK-NEXT: [[MUL45:%.*]] = fmul fast float [[TMP16]], [[TMP6]] ; CHECK-NEXT: store float [[MUL45]], ptr [[ARRAYIDX31]], align 4 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[INDVARS_IV]], 31990 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll index 51798deae694a3..88aafb2bf148bc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll @@ -464,10 +464,10 @@ define void @ashr_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = ashr <16 x i8> [[TMP7]], [[TMP8]] +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = ashr <16 x i8> [[TMP10]], [[TMP11]] -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll index 7583561bbecf90..96977cd4fb7d75 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll @@ -413,10 +413,10 @@ define void @lshr_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = lshr <16 x i8> [[TMP7]], [[TMP8]] +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = lshr <16 x i8> [[TMP10]], [[TMP11]] -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll index 5ec327c131fb78..789316ab33c434 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll @@ -461,10 +461,10 @@ define void @shl_v64i8() { ; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP9:%.*]] = shl <16 x i8> [[TMP7]], [[TMP8]] +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 ; SSE-NEXT: [[TMP12:%.*]] = shl <16 x i8> [[TMP10]], [[TMP11]] -; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 ; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 ; SSE-NEXT: ret void ; diff --git a/llvm/test/Transforms/SpeculativeExecution/dropping-debugloc-hoist.ll b/llvm/test/Transforms/SpeculativeExecution/dropping-debugloc-hoist.ll new file mode 100644 index 00000000000000..6355219699f6bb --- /dev/null +++ b/llvm/test/Transforms/SpeculativeExecution/dropping-debugloc-hoist.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -passes=speculative-execution %s | FileCheck %s + +; Check that SpeculativeExecution's considerHoistingFromTo() drops +; the debug location of the hoisted instructions in a certain branch. + +define void @ifThen() !dbg !5 { +; CHECK-LABEL: define void @ifThen( +; CHECK-SAME: ) !dbg [[DBG5:![0-9]+]] { +; CHECK-NEXT: [[X:%.*]] = add i32 2, 3{{$}} +; + br i1 true, label %a, label %b, !dbg !8 + +a: ; preds = %0 + %x = add i32 2, 3, !dbg !9 + br label %b, !dbg !10 + +b: ; preds = %a, %0 + ret void, !dbg !11 +} + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!2, !3} +!llvm.module.flags = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "main.ll", directory: "/") +!2 = !{i32 4} +!3 = !{i32 0} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "ifThen", linkageName: "ifThen", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 1, column: 1, scope: !5) +!9 = !DILocation(line: 2, column: 1, scope: !5) +!10 = !DILocation(line: 3, column: 1, scope: !5) +!11 = !DILocation(line: 4, column: 1, scope: !5) diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index 3a5d2095e2b93b..bc28259e6939ba 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -224,14 +224,14 @@ define <16 x i16> @concat_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { ret <16 x i16> %r } -; negative - multiuse +; multiuse - ensure cost of any duplicated casts are worth it define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1, ptr %a2) { ; CHECK-LABEL: define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse( ; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], ptr [[A2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0]] to <4 x i16> -; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1]] to <4 x i16> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[X0]], <4 x i16> [[X1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = trunc <8 x i32> [[TMP1]] to <8 x i16> ; CHECK-NEXT: store <4 x i16> [[X0]], ptr [[A2]], align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] ; @@ -242,6 +242,24 @@ define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1 ret <8 x i16> %r } +; negative - multiuse - ensure cost of any duplicated casts are worth it + +define <16 x i8> @concat_trunc_v8i64_v16i8_multiuse(<8 x i64> %a0, <8 x i64> %a1, ptr %a2) { +; CHECK-LABEL: define <16 x i8> @concat_trunc_v8i64_v16i8_multiuse( +; CHECK-SAME: <8 x i64> [[A0:%.*]], <8 x i64> [[A1:%.*]], ptr [[A2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = trunc <8 x i64> [[A0]] to <8 x i8> +; CHECK-NEXT: [[X1:%.*]] = trunc <8 x i64> [[A1]] to <8 x i8> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[X0]], <8 x i8> [[X1]], <16 x i32> +; CHECK-NEXT: store <8 x i8> [[X0]], ptr [[A2]], align 8 +; CHECK-NEXT: ret <16 x i8> [[R]] +; + %x0 = trunc <8 x i64> %a0 to <8 x i8> + %x1 = trunc <8 x i64> %a1 to <8 x i8> + %r = shufflevector <8 x i8> %x0, <8 x i8> %x1, <16 x i32> + store <8 x i8> %x0, ptr %a2 + ret <16 x i8> %r +} + ; negative - bitcasts (unscalable higher element count) define <16 x i16> @revpair_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { diff --git a/llvm/test/tools/llvm-objcopy/ELF/change-section-lma.test b/llvm/test/tools/llvm-objcopy/ELF/change-section-lma.test new file mode 100644 index 00000000000000..c1cd1eb46d9496 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/change-section-lma.test @@ -0,0 +1,79 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objcopy --change-section-lma *+0x20 %t %t2 +# RUN: llvm-readelf --program-headers %t2 | FileCheck %s --check-prefix=CHECK-PLUS-PROGRAMS +# RUN: llvm-readelf --section-headers %t2 | FileCheck %s --check-prefix=CHECK-PLUS-SECTIONS +# RUN: llvm-objcopy --change-section-lma *-0x30 %t %t3 +# RUN: llvm-readelf --program-headers %t3 | FileCheck %s --check-prefix=CHECK-MINUS-PROGRAMS +# RUN: llvm-readelf --section-headers %t3 | FileCheck %s --check-prefix=CHECK-MINUS-SECTIONS +# RUN: not llvm-objcopy --change-section-lma .text3=0x5000 %t 2>&1 | FileCheck %s --check-prefix=ERR-SET-ADDRESS +# RUN: not llvm-objcopy --change-section-lma .text3+0x30 %t 2>&1 | FileCheck %s --check-prefix=ERR-SPECIFIC-SEC +# RUN: not llvm-objcopy --change-section-lma *+0c50 %t 2>&1 | FileCheck %s --check-prefix=ERR-INVALID-VAL +# RUN: not llvm-objcopy --change-section-lma 0 %t 2>&1 | FileCheck %s --check-prefix=ERR-MISSING-OFFSET +# RUN: not llvm-objcopy --change-section-lma *-0x2000 %t 2>&1 | FileCheck %s --check-prefix=ERR-UNDERFLOW +# RUN: not llvm-objcopy --change-section-lma *+0x100000000 %t 2>&1 | FileCheck %s --check-prefix=ERR-OVERFLOW + +# CHECK-PLUS-PROGRAMS: Type Offset VirtAddr PhysAddr FileSiz MemSiz +# CHECK-PLUS-PROGRAMS: PHDR 0x000002 0x0000000000001102 0x0000000000001122 0x000038 0x000000 +# CHECK-PLUS-PROGRAMS: LOAD 0x000000 0x0000000000001100 0x0000000000001120 0x000258 0x000258 +# CHECK-PLUS-PROGRAMS: LOAD 0x000258 0xffffffff00005100 0xffffffff00006120 0x000100 0x000100 +# CHECK-PLUS-PROGRAMS: NOTE 0x000358 0x0000000000001200 0x0000000000001220 0x000010 0x000000 +# CHECK-PLUS-PROGRAMS: NOTE 0x000368 0x0000000000000000 0x0000000000000000 0x000000 0x000000 + +# CHECK-MINUS-PROGRAMS: PHDR 0x000002 0x0000000000001102 0x00000000000010d2 0x000038 0x000000 +# CHECK-MINUS-PROGRAMS: LOAD 0x000000 0x0000000000001100 0x00000000000010d0 0x000258 0x000258 +# CHECK-MINUS-PROGRAMS: LOAD 0x000258 0xffffffff00005100 0xffffffff000060d0 0x000100 0x000100 +# CHECK-MINUS-PROGRAMS: NOTE 0x000358 0x0000000000001200 0x00000000000011d0 0x000010 0x000000 +# CHECK-MINUS-PROGRAMS: NOTE 0x000368 0x0000000000000000 0x0000000000000000 0x000000 0x000000 + +# CHECK-PLUS-SECTIONS: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK-PLUS-SECTIONS: .text1 +# CHECK-PLUS-SECTIONS-SAME: 0000000000000000 +# CHECK-PLUS-SECTIONS: .text2 +# CHECK-PLUS-SECTIONS-SAME: 0000000000000000 + +# CHECK-MINUS-SECTIONS: .text1 +# CHECK-MINUS-SECTIONS-SAME: 0000000000000000 +# CHECK-MINUS-SECTIONS: .text2 +# CHECK-MINUS-SECTIONS-SAME: 0000000000000000 + +# ERR-SET-ADDRESS: error: bad format for --change-section-lma: changing LMA to a specific value is not supported. Use *+val or *-val instead +# ERR-SPECIFIC-SEC: error: bad format for --change-section-lma: changing a specific section LMA is not supported. Use *+val or *-val instead +# ERR-INVALID-VAL: error: bad format for --change-section-lma: value after *+ is 0c50 when it should be an integer +# ERR-MISSING-OFFSET: error: bad format for --change-section-lma: missing LMA offset +# ERR-UNDERFLOW: : address 0x1102 cannot be decreased by 0x2000. The result would underflow +# ERR-OVERFLOW: address 0xffffffff00006100 cannot be increased by 0x100000000. The result would overflow + +!ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .text1 + Type: SHT_PROGBITS + Size: 0x100 + - Name: .text2 + Type: SHT_PROGBITS + Size: 0x100 +ProgramHeaders: + - Type: PT_PHDR + FileSize: 0x38 + Offset: 0x2 + VAddr: 0x1102 + - Type: PT_LOAD + Offset: 0x0 + VAddr: 0x1100 + FirstSec: .text1 + LastSec: .text1 + - Type: PT_LOAD + VAddr: 0xFFFFFFFF00005100 + PAddr: 0xFFFFFFFF00006100 + FirstSec: .text2 + LastSec: .text2 + - Type: PT_NOTE + FileSize: 0x10 + VAddr: 0x1200 + Offset: 0x358 + - Type: PT_NOTE + FileSize: 0x0 + Offset: 0x368 diff --git a/llvm/test/tools/llvm-objcopy/ELF/crel.test b/llvm/test/tools/llvm-objcopy/ELF/crel.test new file mode 100644 index 00000000000000..daf4567d0c8a2b --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/crel.test @@ -0,0 +1,140 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objcopy --remove-section=.foo --strip-symbol=unused %t %t.out +# RUN: llvm-readelf -Sr %t.out | FileCheck %s + +# CHECK: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# CHECK-NEXT: [ 1] .text PROGBITS 0000000000000000 {{.*}} 000008 00 A 0 0 0 +# CHECK-NEXT: [ 2] .crel.text CREL 0000000000000000 {{.*}} 000022 00 5 1 0 +# CHECK-NEXT: [ 3] nonalloc PROGBITS 0000000000000000 {{.*}} 000030 00 0 0 0 +# CHECK-NEXT: [ 4] .crelnonalloc CREL 0000000000000000 {{.*}} 00000b 00 5 3 0 + +# CHECK: Relocation section '.crel.text' at offset {{.*}} contains 4 entries: +# CHECK-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# CHECK-NEXT: 0000000000000001 {{.*}} R_X86_64_32 0000000000000000 g1 + 1 +# CHECK-NEXT: 0000000000000002 {{.*}} R_X86_64_64 0000000000000000 l1 + 2 +# CHECK-NEXT: 0000000000000000 {{.*}} R_X86_64_32S 0000000000000000 g1 - 1 +# CHECK-NEXT: 0000000000000004 {{.*}} R_X86_64_32S 0000000000000000 .text - 8000000000000000 +# CHECK-EMPTY: +# CHECK-NEXT: Relocation section '.crelnonalloc' at offset {{.*}} contains 3 entries: +# CHECK-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# CHECK-NEXT: 0000000000000010 {{.*}} R_X86_64_64 0000000000000000 g1 + 1 +# CHECK-NEXT: 0000000000000020 {{.*}} R_X86_64_64 0000000000000000 g2 + 2 +# CHECK-NEXT: 0000000000000030 {{.*}} R_X86_64_64 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + +Sections: +- Name: .foo + Type: SHT_PROGBITS + Flags: [SHF_ALLOC] +- Name: .text + Type: SHT_PROGBITS + Content: "0000000000000000" + Flags: [SHF_ALLOC] +- Name: .crel.text + Type: SHT_CREL + Info: .text + Link: .symtab + Relocations: + - Offset: 0x1 + Symbol: g1 + Type: R_X86_64_32 + Addend: 1 + - Offset: 0x2 + Symbol: l1 + Type: R_X86_64_64 + Addend: 2 + - Offset: 0x0 + Symbol: g1 + Type: R_X86_64_32S + Addend: 0xffffffffffffffff + - Offset: 0x4 + Symbol: .text + Type: R_X86_64_32S + Addend: 0x8000000000000000 +- Name: nonalloc + Type: SHT_PROGBITS + Size: 0x30 +- Name: .crelnonalloc + Type: SHT_CREL + Info: nonalloc + Link: .symtab + Relocations: + - Offset: 0x10 + Symbol: g1 + Type: R_X86_64_64 + Addend: 1 + - Offset: 0x20 + Symbol: g2 + Type: R_X86_64_64 + Addend: 2 + - Offset: 0x30 + Symbol: 0 + Type: R_X86_64_64 + +Symbols: + - Name: unused + Section: .text + - Name: .text + Type: STT_SECTION + Section: .text + - Name: l1 + - Name: g1 + Section: .text + Value: 0x0 + Size: 4 + Binding: STB_GLOBAL + - Name: g2 + Binding: STB_GLOBAL + +# RUN: yaml2obj --docnum=2 %s -o %t.32 +# RUN: llvm-objcopy %t.32 %t.32.out +# RUN: llvm-readobj -r %t.32.out | FileCheck %s --check-prefix=CHECK2 + +# CHECK2: Relocations [ +# CHECK2-NEXT: Section (2) .crel.text { +# CHECK2-NEXT: 0x0 R_X86_64_32S g1 0xFFFFFFFF +# CHECK2-NEXT: 0x4 R_X86_64_32S .text 0x80000000 +# CHECK2-NEXT: } +# CHECK2-NEXT: ] + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + +Sections: +- Name: .text + Type: SHT_PROGBITS + Content: "0000000000000000" + Flags: [SHF_ALLOC] +- Name: .crel.text + Type: SHT_CREL + Info: .text + Link: .symtab + Relocations: + - Offset: 0x0 + Symbol: g1 + Type: R_X86_64_32S + Addend: 0xffffffff + - Offset: 0x4 + Symbol: .text + Type: R_X86_64_32S + Addend: 0x80000000 + +Symbols: + - Name: .text + Type: STT_SECTION + Section: .text + - Name: g1 + Section: .text + Size: 4 + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-objcopy/ELF/reloc-error-remove-symtab.test b/llvm/test/tools/llvm-objcopy/ELF/reloc-error-remove-symtab.test index 54820180258627..cda9b8115d0e69 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/reloc-error-remove-symtab.test +++ b/llvm/test/tools/llvm-objcopy/ELF/reloc-error-remove-symtab.test @@ -3,6 +3,9 @@ # RUN: cp %t %t3 # RUN: not llvm-strip --no-strip-all -R .symtab %t3 2>&1 >/dev/null | FileCheck %s --check-prefix=ERR2 -DINPUT=%t3 +# RUN: yaml2obj -DTYPE=SHT_CREL %s -o %t.crel +# RUN: not llvm-objcopy -R .symtab %t.crel %t2.crel 2>&1 >/dev/null | FileCheck %s --check-prefix=ERR1 -DINPUT=%t.crel + !ELF FileHeader: Class: ELFCLASS64 @@ -17,7 +20,7 @@ Sections: AddressAlign: 0x0000000000000010 Content: "0000000000000000" - Name: .rel.text - Type: SHT_REL + Type: [[TYPE=SHT_REL]] Link: .symtab Info: .text Relocations: @@ -40,6 +43,12 @@ Symbols: # RUN: llvm-strip --no-strip-all --allow-broken-links -R .symtab %t5 # RUN: llvm-readobj --sections %t5 | FileCheck %s --check-prefix=SECTIONS --implicit-check-not=.symtab +# RUN: llvm-objcopy --allow-broken-links -R .symtab %t.crel %t4.crel +# RUN: llvm-readobj --sections %t4.crel | FileCheck %s --check-prefix=SECTIONS --implicit-check-not=.symtab +# RUN: cp %t.crel %t5.crel +# RUN: llvm-strip --no-strip-all --allow-broken-links -R .symtab %t5.crel +# RUN: llvm-readobj --sections %t5.crel | FileCheck %s --check-prefix=SECTIONS --implicit-check-not=.symtab + # SECTIONS: Name: .rel.text # SECTIONS: Link # SECTIONS-SAME: : 0 diff --git a/llvm/test/tools/llvm-objcopy/ELF/remove-section-in-group.test b/llvm/test/tools/llvm-objcopy/ELF/remove-section-in-group.test index 9e683b9f68c939..ccc1ede0589c17 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/remove-section-in-group.test +++ b/llvm/test/tools/llvm-objcopy/ELF/remove-section-in-group.test @@ -1,6 +1,6 @@ ## This checks that the group section is shrunk when its member is removed. -# RUN: yaml2obj %s -o - \ +# RUN: yaml2obj --docnum=1 %s -o - \ # RUN: | llvm-objcopy -R .foo - - \ # RUN: | obj2yaml - \ # RUN: | FileCheck %s @@ -35,3 +35,51 @@ Symbols: - Name: foo_bar_grp Section: .group Binding: STB_GLOBAL + +# RUN: yaml2obj --docnum=2 %s -o %t +# RUN: llvm-objcopy --remove-section=.debug_macro %t +# RUN: llvm-readelf --section-groups %t | FileCheck %s --check-prefix=GROUP-REMOVED + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .group + Type: SHT_GROUP + Info: foo_grp + Members: + - SectionOrType: GRP_COMDAT + - SectionOrType: .debug_macro + - Name: .debug_macro + Type: SHT_PROGBITS + Flags: [ SHF_GROUP ] +Symbols: + - Name: foo_grp + Section: .group + +# GROUP-REMOVED: There are no section groups in this file. + +# RUN: yaml2obj --docnum=3 %s -o %t +# RUN: llvm-objcopy --remove-section=.group %t +# RUN: llvm-readelf --section-groups %t | FileCheck %s --check-prefix=EMPTY-GROUP-REMOVED + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .group + Type: SHT_GROUP + Info: foo_grp + Members: + - SectionOrType: GRP_COMDAT +Symbols: + - Name: foo_grp + Section: .group + +# EMPTY-GROUP-REMOVED: There are no section groups in this file. diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test b/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test index 941dacce2edf29..7f2cd726b15c8f 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test +++ b/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test @@ -1,5 +1,7 @@ # RUN: yaml2obj %s -o %t # RUN: not llvm-objcopy -N foo %t %t2 2>&1 | FileCheck %s -DFILE=%t +# RUN: yaml2obj -DTYPE=SHT_CREL %s -o %t1 +# RUN: not llvm-objcopy -N foo %t1 /dev/null 2>&1 | FileCheck %s -DFILE=%t1 !ELF FileHeader: @@ -15,7 +17,7 @@ Sections: AddressAlign: 0x0000000000000010 Size: 64 - Name: .rel.text - Type: SHT_REL + Type: [[TYPE=SHT_REL]] Info: .text Relocations: - Offset: 0x1000 diff --git a/llvm/test/tools/llvm-objdump/ELF/crel.test b/llvm/test/tools/llvm-objdump/ELF/crel.test new file mode 100644 index 00000000000000..ef3c4b011135c1 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/crel.test @@ -0,0 +1,213 @@ +# RUN: yaml2obj --docnum=1 %s -o %t +# RUN: llvm-objdump -r %t | FileCheck %s --strict-whitespace --match-full-lines + +# CHECK:RELOCATION RECORDS FOR [.text]: +# CHECK-NEXT:OFFSET TYPE VALUE +# CHECK-NEXT:0000000000000001 R_X86_64_32 g1+0x1 +# CHECK-NEXT:0000000000000002 R_X86_64_64 l1+0x2 +# CHECK-NEXT:0000000000000000 R_X86_64_32S g1-0x1 +# CHECK-NEXT:0000000000000004 R_X86_64_32S .text-0x8000000000000000 +#CHECK-EMPTY: +# CHECK-NEXT:RELOCATION RECORDS FOR [nonalloc]: +# CHECK-NEXT:OFFSET TYPE VALUE +# CHECK-NEXT:0000000000000010 R_X86_64_64 g1+0x1 +# CHECK-NEXT:0000000000000020 R_X86_64_64 g2+0x2 +# CHECK-NEXT:0000000000000030 R_X86_64_64 *ABS* +# CHECK-NOT:{{.}} + +--- !ELF +FileHeader: !FileHeader + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + +Sections: +- Name: .text + Type: SHT_PROGBITS + Content: "0000000000000000" + Flags: [SHF_ALLOC] +- Name: .crel.text + Type: SHT_CREL + Info: .text + Link: .symtab + Relocations: + - Offset: 0x1 + Symbol: g1 + Type: R_X86_64_32 + Addend: 1 + - Offset: 0x2 + Symbol: l1 + Type: R_X86_64_64 + Addend: 2 + - Offset: 0x0 + Symbol: g1 + Type: R_X86_64_32S + Addend: 0xffffffffffffffff + - Offset: 0x4 + Symbol: .text + Type: R_X86_64_32S + Addend: 0x8000000000000000 +- Name: nonalloc + Type: SHT_PROGBITS + Size: 0x30 +- Name: .crelnonalloc + Type: SHT_CREL + Info: nonalloc + Link: .symtab + Relocations: + - Offset: 0x10 + Symbol: g1 + Type: R_X86_64_64 + Addend: 1 + - Offset: 0x20 + Symbol: g2 + Type: R_X86_64_64 + Addend: 2 + - Offset: 0x30 + Symbol: 0 + Type: R_X86_64_64 + +Symbols: + - Name: .text + Type: STT_SECTION + Section: .text + - Name: l1 + - Name: g1 + Section: .text + Value: 0x0 + Size: 4 + Binding: STB_GLOBAL + - Name: g2 + Binding: STB_GLOBAL + +## Check relocation formatting on ELFCLASS32 as well. +# RUN: yaml2obj --docnum=2 %s > %t2 +# RUN: llvm-objdump -r %t2 | FileCheck %s --check-prefix=ELF32 --strict-whitespace --match-full-lines + +# ELF32:RELOCATION RECORDS FOR [.text]: +# ELF32-NEXT:OFFSET TYPE VALUE +# ELF32-NEXT:00000008 R_ARM_REL32 l1+0x1 +# ELF32-NEXT:00000004 R_ARM_ABS32 g1 + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2MSB + Type: ET_REL + Machine: EM_ARM +Sections: +- Name: .text + Type: SHT_PROGBITS + Size: 0x10 +- Name: .crel.text + Type: SHT_CREL + Info: .text + Link: .symtab + Relocations: + - Offset: 0x8 + Symbol: l1 + Type: R_ARM_REL32 + Addend: 1 + - Offset: 0x4 + Symbol: g1 + Type: R_ARM_ABS32 +Symbols: + - Name: l1 + - Name: g1 + Binding: STB_GLOBAL + +## Check CREL with implicit addends. +# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: llvm-objdump -r %t3 | FileCheck %s --check-prefix=IMPLICIT --strict-whitespace --match-full-lines +# IMPLICIT:RELOCATION RECORDS FOR [.data]: +# IMPLICIT-NEXT:OFFSET TYPE VALUE +# IMPLICIT-NEXT:000000000000001f R_X86_64_32 g1 +# IMPLICIT-NEXT:000000000000003f R_X86_64_64 g1 +# IMPLICIT-NEXT:0000000000000000 R_X86_64_32S l1 +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + - Name: .data + Type: SHT_PROGBITS + - Name: .crel.data + Type: SHT_CREL + Flags: [ SHF_INFO_LINK ] + Link: .symtab + Info: .data + Content: 187f030a82017787feffffffffffffff077f0a +Symbols: + - Name: .text + Type: STT_SECTION + Section: .text + - Name: l1 + Section: .text + - Name: g1 + Section: .text + Binding: STB_GLOBAL + +## Test errors. +# RUN: yaml2obj --docnum=4 %s -o %t.err +# RUN: llvm-objdump -r %t.err 2>&1 | FileCheck %s --check-prefix=ERR -DFILE=%t.err + +# ERR:RELOCATION RECORDS FOR [.data]: +# ERR-NEXT:OFFSET TYPE VALUE +# ERR-NEXT:warning: '[[FILE]]': unable to decode LEB128 at offset 0x00000000: malformed uleb128, extends past end +# ERR-NOT:{{.}} + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + - Name: .data + Type: SHT_PROGBITS + - Name: .crel.data + Type: SHT_CREL + Flags: [] + Link: .symtab + Info: .data +Symbols: + - Name: .text + Type: STT_SECTION + Section: .text + +# RUN: yaml2obj --docnum=5 %s -o %t.err2 +# RUN: llvm-objdump -r %t.err2 2>&1 | FileCheck %s --check-prefix=ERR2 -DFILE=%t.err2 + +# ERR2:RELOCATION RECORDS FOR [.data]: +# ERR2-NEXT:OFFSET TYPE VALUE +# ERR2-NEXT:warning: '[[FILE]]': unexpected end of data at offset 0x1 while reading [0x1, 0x2) +# ERR2-NOT:{{.}} + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2MSB + Type: ET_REL + Machine: EM_ARM +Sections: + - Name: .text + Type: SHT_PROGBITS + - Name: .data + Type: SHT_PROGBITS + - Name: .crel.data + Type: SHT_CREL + Flags: [] + Link: .symtab + Info: .data + Content: 08 +Symbols: + - Name: .text + Type: STT_SECTION + Section: .text diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test index cce0712e8fa0d9..a6bd09ce3fa241 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test +++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test @@ -6,6 +6,10 @@ # RUN: llvm-objdump 1.o -d -r | FileCheck %s --implicit-check-not="RELOCATION RECORDS" # RUN: llvm-objdump 1.o -r --disassemble-symbols=x2,x4 | FileCheck %s --check-prefix=CHECK2 +# RUN: llvm-mc -filetype=obj -triple=x86_64 -crel 1.s -o 1leb.o +# RUN: llvm-objdump 1leb.o -d -r | FileCheck %s --implicit-check-not="RELOCATION RECORDS" +# RUN: llvm-objdump 1leb.o -r --disassemble-symbols=x2,x4 | FileCheck %s --check-prefix=CHECK2 + #--- 1.s # CHECK: 0000000000000000 : # CHECK-NEXT: 0: e8 00 00 00 00 callq 0x5 diff --git a/llvm/tools/bugpoint/CrashDebugger.cpp b/llvm/tools/bugpoint/CrashDebugger.cpp index 0ca8fa28c4af03..e1a732f4b01921 100644 --- a/llvm/tools/bugpoint/CrashDebugger.cpp +++ b/llvm/tools/bugpoint/CrashDebugger.cpp @@ -72,7 +72,7 @@ cl::opt VerboseErrors("verbose-errors", static bool isValidModule(std::unique_ptr &M, bool ExitOnFailure = true) { - if (!llvm::verifyModule(*M.get(), &llvm::errs())) + if (!llvm::verifyModule(*M, &llvm::errs())) return true; if (ExitOnFailure) { diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp index 9b9af762676195..f28ad00184420f 100644 --- a/llvm/tools/llvm-as/llvm-as.cpp +++ b/llvm/tools/llvm-as/llvm-as.cpp @@ -153,7 +153,7 @@ int main(int argc, char **argv) { if (!DisableVerify) { std::string ErrorStr; raw_string_ostream OS(ErrorStr); - if (verifyModule(*M.get(), &OS)) { + if (verifyModule(*M, &OS)) { errs() << argv[0] << ": assembly parsed, but does not verify as correct!\n"; errs() << OS.str(); @@ -163,7 +163,7 @@ int main(int argc, char **argv) { } if (DumpAsm) { - errs() << "Here's the assembly:\n" << *M.get(); + errs() << "Here's the assembly:\n" << *M; if (Index.get() && Index->begin() != Index->end()) Index->print(errs()); } diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index 4ab3b7265f2f6a..d5f95f874ea6f5 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -552,6 +552,38 @@ static Error loadNewSectionData(StringRef ArgValue, StringRef OptionName, return Error::success(); } +static Expected parseChangeSectionLMA(StringRef ArgValue, + StringRef OptionName) { + StringRef StringValue; + if (ArgValue.starts_with("*+")) { + StringValue = ArgValue.slice(2, StringRef::npos); + } else if (ArgValue.starts_with("*-")) { + StringValue = ArgValue.slice(1, StringRef::npos); + } else if (ArgValue.contains("=")) { + return createStringError(errc::invalid_argument, + "bad format for " + OptionName + + ": changing LMA to a specific value is not " + "supported. Use *+val or *-val instead"); + } else if (ArgValue.contains("+") || ArgValue.contains("-")) { + return createStringError(errc::invalid_argument, + "bad format for " + OptionName + + ": changing a specific section LMA is not " + "supported. Use *+val or *-val instead"); + } + if (StringValue.empty()) + return createStringError(errc::invalid_argument, + "bad format for " + OptionName + + ": missing LMA offset"); + + auto LMAValue = getAsInteger(StringValue); + if (!LMAValue) + return createStringError(LMAValue.getError(), + "bad format for " + OptionName + ": value after " + + ArgValue.slice(0, 2) + " is " + StringValue + + " when it should be an integer"); + return *LMAValue; +} + // parseObjcopyOptions returns the config and sets the input arguments. If a // help flag is set then parseObjcopyOptions will print the help messege and // exit. @@ -833,6 +865,14 @@ objcopy::parseObjcopyOptions(ArrayRef RawArgsArr, Config.PadTo = *Addr; } + if (const auto *Arg = InputArgs.getLastArg(OBJCOPY_change_section_lma)) { + Expected LMAValue = + parseChangeSectionLMA(Arg->getValue(), Arg->getSpelling()); + if (!LMAValue) + return LMAValue.takeError(); + Config.ChangeSectionLMAValAll = *LMAValue; + } + for (auto *Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) { if (!StringRef(Arg->getValue()).contains('=')) return createStringError(errc::invalid_argument, diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td index 4bc80eba05f8e6..f3d14abee43bdf 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td +++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td @@ -254,6 +254,10 @@ def adjust_start : JoinedOrSeparate<["--"], "adjust-start">, Alias, HelpText<"Alias for --change-start">; +defm change_section_lma + : Eq<"change-section-lma", "Shift LMA of non-zero-sized sections in the program header by ">, + MetaVarName<"*{+|-}val">; + defm add_symbol : Eq<"add-symbol", "Add new symbol to .symtab. Accepted flags: " "global, local, weak, default, hidden, protected, file, section, object, " diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index 8c184fc1fbb66a..5ac13495662faf 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -104,7 +104,11 @@ static Error getRelocationValueString(const ELFObjectFile *Obj, // In SHT_REL case we would need to read the addend from section data. // GNU objdump does not do that and we just follow for simplicity atm. bool Undef = false; - if ((*SecOrErr)->sh_type == ELF::SHT_RELA) { + if ((*SecOrErr)->sh_type == ELF::SHT_CREL) { + auto ERela = Obj->getCrel(Rel); + Addend = ERela.r_addend; + Undef = ERela.getSymbol(false) == 0; + } else if ((*SecOrErr)->sh_type == ELF::SHT_RELA) { const typename ELFT::Rela *ERela = Obj->getRela(Rel); Addend = ERela->r_addend; Undef = ERela->getSymbol(false) == 0; diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index 749f9882017577..0544fc4f406bb0 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -7179,7 +7179,7 @@ objdump::getMachODSymObject(const MachOObjectFile *MachOOF, StringRef Filename, DSYMBuf = std::move(BufOrErr.get()); Expected> BinaryOrErr = - createBinary(DSYMBuf.get()->getMemBufferRef()); + createBinary(DSYMBuf->getMemBufferRef()); if (!BinaryOrErr) { reportError(BinaryOrErr.takeError(), DSYMPath); return nullptr; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 6249be4f332b78..d1240025625ca4 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2687,6 +2687,16 @@ void Dumper::printRelocations() { << "VALUE\n"; for (SectionRef Section : P.second) { + // CREL sections require decoding, each section may have its own specific + // decode problems. + if (O.isELF() && ELFSectionRef(Section).getType() == ELF::SHT_CREL) { + StringRef Err = + cast(O).getCrelDecodeProblem(Section); + if (!Err.empty()) { + reportUniqueWarning(Err); + continue; + } + } for (const RelocationRef &Reloc : Section.relocations()) { uint64_t Address = Reloc.getOffset(); SmallString<32> RelocName; diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index a7e506d32ac2ee..632ddc7b50f54a 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -132,7 +132,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( MCPseudoProbeDecoder &ProbeDecoder) { ProbeFrameStack ProbeContext; for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) - trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, *Child.second, ProbeContext); } void BinarySizeContextTracker::trackInlineesOptimizedAway( @@ -161,8 +161,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( for (const auto &ChildNode : ProbeNode.getChildren()) { InlineSite Location = ChildNode.first; ProbeContext.back().second = std::get<1>(Location); - trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), - ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second, ProbeContext); } ProbeContext.pop_back(); @@ -527,7 +526,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, outs() << format("%8" PRIx64 ":", Address); size_t Start = outs().tell(); if (Disassembled) - IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), outs()); + IPrinter->printInst(&Inst, Address + Size, "", *STI, outs()); else outs() << "\t"; if (ShowSourceLocations) { @@ -845,7 +844,7 @@ void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { exitWithError("Error creating the debug info context", Path); for (const auto &CompilationUnit : DebugContext->compile_units()) - loadSymbolsFromDWARFUnit(*CompilationUnit.get()); + loadSymbolsFromDWARFUnit(*CompilationUnit); // Handles DWO sections that can either be in .o, .dwo or .dwp files. uint32_t NumOfDWOMissing = 0; diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index 9ac324cc672f05..15d838617063b2 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -700,14 +700,14 @@ int llvm_readobj_main(int argc, char **argv, const llvm::ToolContext &) { std::unique_ptr Writer = createWriter(); for (const std::string &I : opts::InputFilenames) - dumpInput(I, *Writer.get()); + dumpInput(I, *Writer); if (opts::CodeViewMergedTypes) { if (opts::CodeViewEnableGHash) - dumpCodeViewMergedTypes(*Writer.get(), CVTypes.GlobalIDTable.records(), + dumpCodeViewMergedTypes(*Writer, CVTypes.GlobalIDTable.records(), CVTypes.GlobalTypeTable.records()); else - dumpCodeViewMergedTypes(*Writer.get(), CVTypes.IDTable.records(), + dumpCodeViewMergedTypes(*Writer, CVTypes.IDTable.records(), CVTypes.TypeTable.records()); } diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt index 696926110df7a5..49ed6c8fb6c42f 100644 --- a/llvm/unittests/CMakeLists.txt +++ b/llvm/unittests/CMakeLists.txt @@ -21,6 +21,7 @@ add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) add_subdirectory(Bitstream) add_subdirectory(CodeGen) +add_subdirectory(CodeGenData) add_subdirectory(DebugInfo) add_subdirectory(Debuginfod) add_subdirectory(Demangle) diff --git a/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h b/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h index 06786b15252a0e..fd31e95cce13d9 100644 --- a/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h +++ b/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h @@ -184,8 +184,7 @@ static inline bool CheckMachineFunction(const MachineFunction &MF, SmallString<4096> CheckFileBuffer; FileCheckRequest Req; FileCheck FC(Req); - StringRef CheckFileText = - FC.CanonicalizeFile(*CheckBuf.get(), CheckFileBuffer); + StringRef CheckFileText = FC.CanonicalizeFile(*CheckBuf, CheckFileBuffer); SourceMgr SM; SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(CheckFileText, "CheckFile"), SMLoc()); diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 831d7e6292e335..0932938b209a4b 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -610,10 +610,10 @@ TEST_F(AArch64GISelMITest, WidenBitCountingCTLZZeroUndef) { auto CheckStr = R"( CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC CHECK: [[Zext:%[0-9]+]]:_(s16) = G_ZEXT [[Trunc]] - CHECK: [[CtlzZu:%[0-9]+]]:_(s16) = G_CTLZ_ZERO_UNDEF [[Zext]] CHECK: [[Cst8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - CHECK: [[Sub:%[0-9]+]]:_(s16) = G_SUB [[CtlzZu]]:_, [[Cst8]]:_ - CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC [[Sub]] + CHECK: [[Shl:%[0-9]+]]:_(s16) = G_SHL [[Zext]]:_, [[Cst8]]:_ + CHECK: [[CtlzZu:%[0-9]+]]:_(s16) = G_CTLZ_ZERO_UNDEF [[Shl]] + CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC [[CtlzZu]] )"; // Check diff --git a/llvm/unittests/CodeGenData/CMakeLists.txt b/llvm/unittests/CodeGenData/CMakeLists.txt new file mode 100644 index 00000000000000..3d821b87e29d8c --- /dev/null +++ b/llvm/unittests/CodeGenData/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + CodeGen + CodeGenData + Core + Support + ) + +add_llvm_unittest(CodeGenDataTests + OutlinedHashTreeRecordTest.cpp + OutlinedHashTreeTest.cpp + ) + +target_link_libraries(CodeGenDataTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp b/llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp new file mode 100644 index 00000000000000..aa7ad4a33754ff --- /dev/null +++ b/llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp @@ -0,0 +1,118 @@ +//===- OutlinedHashTreeRecordTest.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(OutlinedHashTreeRecordTest, Empty) { + OutlinedHashTreeRecord HashTreeRecord; + ASSERT_TRUE(HashTreeRecord.empty()); +} + +TEST(OutlinedHashTreeRecordTest, Print) { + OutlinedHashTreeRecord HashTreeRecord; + HashTreeRecord.HashTree->insert({{1, 2}, 3}); + + const char *ExpectedTreeStr = R"(--- +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 3 + SuccessorIds: [ ] +... +)"; + std::string TreeDump; + raw_string_ostream OS(TreeDump); + HashTreeRecord.print(OS); + EXPECT_EQ(ExpectedTreeStr, TreeDump); +} + +TEST(OutlinedHashTreeRecordTest, Stable) { + OutlinedHashTreeRecord HashTreeRecord1; + HashTreeRecord1.HashTree->insert({{1, 2}, 4}); + HashTreeRecord1.HashTree->insert({{1, 3}, 5}); + + OutlinedHashTreeRecord HashTreeRecord2; + HashTreeRecord2.HashTree->insert({{1, 3}, 5}); + HashTreeRecord2.HashTree->insert({{1, 2}, 4}); + + // Output is stable regardless of insertion order. + std::string TreeDump1; + raw_string_ostream OS1(TreeDump1); + HashTreeRecord1.print(OS1); + std::string TreeDump2; + raw_string_ostream OS2(TreeDump2); + HashTreeRecord2.print(OS2); + + EXPECT_EQ(TreeDump1, TreeDump2); +} + +TEST(OutlinedHashTreeRecordTest, Serialize) { + OutlinedHashTreeRecord HashTreeRecord1; + HashTreeRecord1.HashTree->insert({{1, 2}, 4}); + HashTreeRecord1.HashTree->insert({{1, 3}, 5}); + + // Serialize and deserialize the tree. + SmallVector Out; + raw_svector_ostream OS(Out); + HashTreeRecord1.serialize(OS); + + OutlinedHashTreeRecord HashTreeRecord2; + const uint8_t *Data = reinterpret_cast(Out.data()); + HashTreeRecord2.deserialize(Data); + + // Two trees should be identical. + std::string TreeDump1; + raw_string_ostream OS1(TreeDump1); + HashTreeRecord1.print(OS1); + std::string TreeDump2; + raw_string_ostream OS2(TreeDump2); + HashTreeRecord2.print(OS2); + + EXPECT_EQ(TreeDump1, TreeDump2); +} + +TEST(OutlinedHashTreeRecordTest, SerializeYAML) { + OutlinedHashTreeRecord HashTreeRecord1; + HashTreeRecord1.HashTree->insert({{1, 2}, 4}); + HashTreeRecord1.HashTree->insert({{1, 3}, 5}); + + // Serialize and deserialize the tree in a YAML format. + std::string Out; + raw_string_ostream OS(Out); + yaml::Output YOS(OS); + HashTreeRecord1.serializeYAML(YOS); + + OutlinedHashTreeRecord HashTreeRecord2; + yaml::Input YIS(StringRef(Out.data(), Out.size())); + HashTreeRecord2.deserializeYAML(YIS); + + // Two trees should be identical. + std::string TreeDump1; + raw_string_ostream OS1(TreeDump1); + HashTreeRecord1.print(OS1); + std::string TreeDump2; + raw_string_ostream OS2(TreeDump2); + HashTreeRecord2.print(OS2); + + EXPECT_EQ(TreeDump1, TreeDump2); +} + +} // end namespace diff --git a/llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp b/llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp new file mode 100644 index 00000000000000..637ab3cd08c1ce --- /dev/null +++ b/llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp @@ -0,0 +1,82 @@ +//===- OutlinedHashTreeTest.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGenData/OutlinedHashTree.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(OutlinedHashTreeTest, Empty) { + OutlinedHashTree HashTree; + EXPECT_TRUE(HashTree.empty()); + // The header node is always present. + EXPECT_EQ(HashTree.size(), 1u); + EXPECT_EQ(HashTree.depth(), 0u); +} + +TEST(OutlinedHashTreeTest, Insert) { + OutlinedHashTree HashTree; + HashTree.insert({{1, 2, 3}, 1}); + // The node count is 4 (including the root node). + EXPECT_EQ(HashTree.size(), 4u); + // The terminal count is 1. + EXPECT_EQ(HashTree.size(/*GetTerminalCountOnly=*/true), 1u); + // The depth is 3. + EXPECT_EQ(HashTree.depth(), 3u); + + HashTree.clear(); + EXPECT_TRUE(HashTree.empty()); + + HashTree.insert({{1, 2, 3}, 1}); + HashTree.insert({{1, 2, 4}, 2}); + // The nodes of 1 and 2 are shared with the same prefix. + // The nodes are root, 1, 2, 3 and 4, whose counts are 5. + EXPECT_EQ(HashTree.size(), 5u); +} + +TEST(OutlinedHashTreeTest, Find) { + OutlinedHashTree HashTree; + HashTree.insert({{1, 2, 3}, 1}); + HashTree.insert({{1, 2, 3}, 2}); + + // The node count does not change as the same sequences are added. + EXPECT_EQ(HashTree.size(), 4u); + // The terminal counts are accumulated from two same sequences. + EXPECT_TRUE(HashTree.find({1, 2, 3})); + EXPECT_EQ(HashTree.find({1, 2, 3}).value(), 3u); + EXPECT_FALSE(HashTree.find({1, 2})); +} + +TEST(OutlinedHashTreeTest, Merge) { + // Build HashTree1 inserting 2 sequences. + OutlinedHashTree HashTree1; + + HashTree1.insert({{1, 2}, 20}); + HashTree1.insert({{1, 4}, 30}); + + // Build HashTree2 and HashTree3 for each + OutlinedHashTree HashTree2; + HashTree2.insert({{1, 2}, 20}); + OutlinedHashTree HashTree3; + HashTree3.insert({{1, 4}, 30}); + + // Merge HashTree3 into HashTree2. + HashTree2.merge(&HashTree3); + + // Compare HashTree1 and HashTree2. + EXPECT_EQ(HashTree1.size(), HashTree2.size()); + EXPECT_EQ(HashTree1.depth(), HashTree2.depth()); + EXPECT_EQ(HashTree1.find({1, 2}), HashTree2.find({1, 2})); + EXPECT_EQ(HashTree1.find({1, 4}), HashTree2.find({1, 4})); + EXPECT_EQ(HashTree1.find({1, 3}), HashTree2.find({1, 3})); +} + +} // end namespace diff --git a/llvm/unittests/DebugInfo/BTF/BTFParserTest.cpp b/llvm/unittests/DebugInfo/BTF/BTFParserTest.cpp index 7b4f7939e67830..5b203adfeb284f 100644 --- a/llvm/unittests/DebugInfo/BTF/BTFParserTest.cpp +++ b/llvm/unittests/DebugInfo/BTF/BTFParserTest.cpp @@ -147,7 +147,7 @@ struct MockData1 { Obj = yaml::yaml2ObjectFile(Storage, Buffer, [](const Twine &Err) { errs() << Err; }); - return *Obj.get(); + return *Obj; } }; @@ -521,7 +521,7 @@ class MockData2 { Obj = yaml::yaml2ObjectFile(ObjStorage, YamlBuffer, [](const Twine &Err) { errs() << Err; }); - return *Obj.get(); + return *Obj; } }; diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp index 166a62ad2daa31..43fdf5d3d6f319 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp @@ -101,7 +101,7 @@ DWARFExpressionCopyBytesTest::createStreamer(raw_pwrite_stream &OS) { Res.Ctx = std::make_unique(Triple(TripleName), MAI.get(), MRI.get(), /*MSTI=*/nullptr); - Res.MOFI.reset(TheTarget->createMCObjectFileInfo(*Res.Ctx.get(), + Res.MOFI.reset(TheTarget->createMCObjectFileInfo(*Res.Ctx, /*PIC=*/false)); Res.Ctx->setObjectFileInfo(Res.MOFI.get()); diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp index 3f766a414f08f2..17573ca57e0874 100644 --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -3870,6 +3870,85 @@ TEST_F(DIExpressionTest, createFragmentExpression) { #undef EXPECT_INVALID_FRAGMENT } +TEST_F(DIExpressionTest, extractLeadingOffset) { + int64_t Offset; + SmallVector Remaining; + using namespace dwarf; +#define OPS(...) SmallVector(ArrayRef{__VA_ARGS__}) +#define EXTRACT_FROM(...) \ + DIExpression::get(Context, {__VA_ARGS__}) \ + ->extractLeadingOffset(Offset, Remaining) + // Test the number of expression inputs + // ------------------------------------ + // + // Single location expressions are permitted. + EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 2)); + EXPECT_EQ(Offset, 2); + EXPECT_EQ(Remaining.size(), 0u); + // This is also a single-location. + EXPECT_TRUE(EXTRACT_FROM(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 2)); + EXPECT_EQ(Offset, 2); + EXPECT_EQ(Remaining.size(), 0u); + // Variadic locations are not permitted. A non-zero arg is assumed to + // indicate multiple inputs. + EXPECT_FALSE(EXTRACT_FROM(DW_OP_LLVM_arg, 1)); + EXPECT_FALSE(EXTRACT_FROM(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus)); + + // Test offsets expressions + // ------------------------ + EXPECT_TRUE(EXTRACT_FROM()); + EXPECT_EQ(Offset, 0); + EXPECT_EQ(Remaining.size(), 0u); + + EXPECT_TRUE(EXTRACT_FROM(DW_OP_constu, 4, DW_OP_plus)); + EXPECT_EQ(Offset, 4); + EXPECT_EQ(Remaining.size(), 0u); + + EXPECT_TRUE(EXTRACT_FROM(DW_OP_constu, 2, DW_OP_minus)); + EXPECT_EQ(Offset, -2); + EXPECT_EQ(Remaining.size(), 0u); + + EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 8)); + EXPECT_EQ(Offset, 8); + EXPECT_EQ(Remaining.size(), 0u); + + EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 4, DW_OP_constu, 2, DW_OP_minus)); + EXPECT_EQ(Offset, 2); + EXPECT_EQ(Remaining.size(), 0u); + + // Not all operations are permitted for simplicity. Can be added + // if needed in future. + EXPECT_FALSE(EXTRACT_FROM(DW_OP_constu, 2, DW_OP_mul)); + + // Test "remaining ops" + // -------------------- + EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 4, DW_OP_constu, 8, DW_OP_minus, + DW_OP_LLVM_fragment, 0, 32)); + EXPECT_EQ(Remaining, OPS(DW_OP_LLVM_fragment, 0, 32)); + EXPECT_EQ(Offset, -4); + + EXPECT_TRUE(EXTRACT_FROM(DW_OP_deref)); + EXPECT_EQ(Remaining, OPS(DW_OP_deref)); + EXPECT_EQ(Offset, 0); + + // Check things after the non-offset ops are added too. + EXPECT_TRUE(EXTRACT_FROM(DW_OP_plus_uconst, 2, DW_OP_deref_size, 4, + DW_OP_stack_value)); + EXPECT_EQ(Remaining, OPS(DW_OP_deref_size, 4, DW_OP_stack_value)); + EXPECT_EQ(Offset, 2); + + // DW_OP_deref_type isn't supported in LLVM so this currently fails. + EXPECT_FALSE(EXTRACT_FROM(DW_OP_deref_type, 0)); + + EXPECT_TRUE(EXTRACT_FROM(DW_OP_LLVM_extract_bits_zext, 0, 8)); + EXPECT_EQ(Remaining, OPS(DW_OP_LLVM_extract_bits_zext, 0, 8)); + + EXPECT_TRUE(EXTRACT_FROM(DW_OP_LLVM_extract_bits_sext, 4, 4)); + EXPECT_EQ(Remaining, OPS(DW_OP_LLVM_extract_bits_sext, 4, 4)); +#undef EXTRACT_FROM +#undef OPS +} + TEST_F(DIExpressionTest, convertToUndefExpression) { #define EXPECT_UNDEF_OPS_EQUAL(TestExpr, Expected) \ do { \ diff --git a/llvm/unittests/IR/VFABIDemanglerTest.cpp b/llvm/unittests/IR/VFABIDemanglerTest.cpp index d7485217951c42..a9dd93a0c2b1b9 100644 --- a/llvm/unittests/IR/VFABIDemanglerTest.cpp +++ b/llvm/unittests/IR/VFABIDemanglerTest.cpp @@ -43,7 +43,7 @@ class VFABIParserTest : public ::testing::Test { M = parseAssemblyString("declare void @dummy()", Err, Ctx); EXPECT_NE(M.get(), nullptr) << "Loading an invalid module.\n " << Err.getMessage() << "\n"; - Type *Ty = parseType(ScalarFTyStr, Err, *(M.get())); + Type *Ty = parseType(ScalarFTyStr, Err, *(M)); ScalarFTy = dyn_cast(Ty); EXPECT_NE(ScalarFTy, nullptr) << "Invalid function type string: " << ScalarFTyStr << "\n" diff --git a/llvm/unittests/Linker/LinkModulesTest.cpp b/llvm/unittests/Linker/LinkModulesTest.cpp index 884e20e89c5c2f..21c3f0ecf4bc15 100644 --- a/llvm/unittests/Linker/LinkModulesTest.cpp +++ b/llvm/unittests/Linker/LinkModulesTest.cpp @@ -40,8 +40,9 @@ class LinkModuleTest : public testing::Test { AT = ArrayType::get(PointerType::getUnqual(Ctx), 3); - GV = new GlobalVariable(*M.get(), AT, false /*=isConstant*/, - GlobalValue::InternalLinkage, nullptr,"switch.bas"); + GV = + new GlobalVariable(*M, AT, false /*=isConstant*/, + GlobalValue::InternalLinkage, nullptr, "switch.bas"); // Global Initializer std::vector Init; diff --git a/llvm/unittests/MC/AMDGPU/CMakeLists.txt b/llvm/unittests/MC/AMDGPU/CMakeLists.txt index 0a399772e019e7..029f814a7d510e 100644 --- a/llvm/unittests/MC/AMDGPU/CMakeLists.txt +++ b/llvm/unittests/MC/AMDGPU/CMakeLists.txt @@ -6,12 +6,15 @@ include_directories( set(LLVM_LINK_COMPONENTS AMDGPUCodeGen AMDGPUDesc + AMDGPUDisassembler AMDGPUInfo MC + MCDisassembler Support TargetParser ) add_llvm_unittest(AMDGPUMCTests + Disassembler.cpp DwarfRegMappings.cpp ) diff --git a/llvm/unittests/MC/AMDGPU/Disassembler.cpp b/llvm/unittests/MC/AMDGPU/Disassembler.cpp new file mode 100644 index 00000000000000..23413030136ab5 --- /dev/null +++ b/llvm/unittests/MC/AMDGPU/Disassembler.cpp @@ -0,0 +1,207 @@ +//===- llvm/unittest/unittests/MC/AMDGPU/Disassembler.cpp -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Disassembler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; + +static const char *symbolLookupCallback(void *DisInfo, uint64_t ReferenceValue, + uint64_t *ReferenceType, + uint64_t ReferencePC, + const char **ReferenceName) { + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + return nullptr; +} + +static const char *TripleName = "amdgcn--amdpal"; +static const char *CPUName = "gfx1030"; + +// Basic smoke test. +TEST(AMDGPUDisassembler, Basic) { + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUDisassembler(); + + uint8_t Bytes[] = {0x04, 0x00, 0x80, 0xb0}; + uint8_t *BytesP = Bytes; + const char OutStringSize = 100; + char OutString[OutStringSize]; + LLVMDisasmContextRef DCR = LLVMCreateDisasmCPU( + TripleName, CPUName, nullptr, 0, nullptr, symbolLookupCallback); + + // Skip test if AMDGPU not built. + if (!DCR) + GTEST_SKIP(); + + size_t InstSize; + unsigned NumBytes = sizeof(Bytes); + unsigned PC = 0U; + + InstSize = LLVMDisasmInstruction(DCR, BytesP, NumBytes, PC, OutString, + OutStringSize); + EXPECT_EQ(InstSize, 4U); + EXPECT_EQ(StringRef(OutString), "\ts_version UC_VERSION_GFX10"); + + LLVMDisasmDispose(DCR); +} + +// Check multiple disassemblers in same MCContext. +TEST(AMDGPUDisassembler, MultiDisassembler) { + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUDisassembler(); + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + + // Skip test if AMDGPU not built. + if (!TheTarget) + GTEST_SKIP(); + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + std::unique_ptr MAI( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, CPUName, "")); + auto Ctx = std::make_unique(Triple(TripleName), MAI.get(), + MRI.get(), STI.get()); + + int AsmPrinterVariant = MAI->getAssemblerDialect(); + std::unique_ptr IP(TheTarget->createMCInstPrinter( + Triple(TripleName), AsmPrinterVariant, *MAI, *MII, *MRI)); + + SmallVector InsnStr, AnnoStr; + raw_svector_ostream OS(InsnStr); + raw_svector_ostream Annotations(AnnoStr); + formatted_raw_ostream FormattedOS(OS); + + char StrBuffer[128]; + + uint8_t Bytes[] = {0x04, 0x00, 0x80, 0xb0}; + uint64_t InstSize = 0U; + MCInst Inst1, Inst2; + MCDisassembler::DecodeStatus Status; + + // Test disassembler works as expected. + AnnoStr.clear(); + InsnStr.clear(); + std::unique_ptr DisAsm1( + TheTarget->createMCDisassembler(*STI, *Ctx)); + Status = DisAsm1->getInstruction(Inst1, InstSize, Bytes, 0, Annotations); + ASSERT_TRUE(Status == MCDisassembler::Success); + EXPECT_EQ(InstSize, 4U); + + IP->printInst(&Inst1, 0U, Annotations.str(), *STI, FormattedOS); + ASSERT_TRUE(InsnStr.size() < (sizeof(StrBuffer) - 1)); + std::memcpy(StrBuffer, InsnStr.data(), InsnStr.size()); + StrBuffer[InsnStr.size()] = '\0'; + EXPECT_EQ(StringRef(StrBuffer), "\ts_version UC_VERSION_GFX10"); + + // Test that second disassembler in same context works as expected. + AnnoStr.clear(); + InsnStr.clear(); + std::unique_ptr DisAsm2( + TheTarget->createMCDisassembler(*STI, *Ctx)); + Status = DisAsm2->getInstruction(Inst2, InstSize, Bytes, 0, Annotations); + ASSERT_TRUE(Status == MCDisassembler::Success); + EXPECT_EQ(InstSize, 4U); + + IP->printInst(&Inst2, 0U, Annotations.str(), *STI, FormattedOS); + ASSERT_TRUE(InsnStr.size() < (sizeof(StrBuffer) - 1)); + std::memcpy(StrBuffer, InsnStr.data(), InsnStr.size()); + StrBuffer[InsnStr.size()] = '\0'; + EXPECT_EQ(StringRef(StrBuffer), "\ts_version UC_VERSION_GFX10"); +} + +// Test UC_VERSION symbols can be overriden without crashing. +// There is no valid behaviour if symbols are redefined in this way. +TEST(AMDGPUDisassembler, UCVersionOverride) { + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUDisassembler(); + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + + // Skip test if AMDGPU not built. + if (!TheTarget) + GTEST_SKIP(); + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + std::unique_ptr MAI( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, CPUName, "")); + auto Ctx = std::make_unique(Triple(TripleName), MAI.get(), + MRI.get(), STI.get()); + + // Define custom UC_VERSION before initializing disassembler. + const uint8_t UC_VERSION_GFX10_DEFAULT = 0x04; + const uint8_t UC_VERSION_GFX10_NEW = 0x99; + auto Sym = Ctx->getOrCreateSymbol("UC_VERSION_GFX10"); + Sym->setVariableValue(MCConstantExpr::create(UC_VERSION_GFX10_NEW, *Ctx)); + + int AsmPrinterVariant = MAI->getAssemblerDialect(); + std::unique_ptr IP(TheTarget->createMCInstPrinter( + Triple(TripleName), AsmPrinterVariant, *MAI, *MII, *MRI)); + + testing::internal::CaptureStderr(); + std::unique_ptr DisAsm( + TheTarget->createMCDisassembler(*STI, *Ctx)); + std::string Output = testing::internal::GetCapturedStderr(); + EXPECT_TRUE(Output.find(":0: warning: unsupported redefinition of " + "UC_VERSION_GFX10") != std::string::npos); + + SmallVector InsnStr, AnnoStr; + raw_svector_ostream OS(InsnStr); + raw_svector_ostream Annotations(AnnoStr); + formatted_raw_ostream FormattedOS(OS); + + char StrBuffer[128]; + + // Decode S_VERSION instruction with original or custom version. + uint8_t Versions[] = {UC_VERSION_GFX10_DEFAULT, UC_VERSION_GFX10_NEW}; + for (uint8_t Version : Versions) { + uint8_t Bytes[] = {Version, 0x00, 0x80, 0xb0}; + uint64_t InstSize = 0U; + MCInst Inst; + + AnnoStr.clear(); + InsnStr.clear(); + MCDisassembler::DecodeStatus Status = + DisAsm->getInstruction(Inst, InstSize, Bytes, 0, Annotations); + ASSERT_TRUE(Status == MCDisassembler::Success); + EXPECT_EQ(InstSize, 4U); + + IP->printInst(&Inst, 0, Annotations.str(), *STI, FormattedOS); + ASSERT_TRUE(InsnStr.size() < (sizeof(StrBuffer) - 1)); + std::memcpy(StrBuffer, InsnStr.data(), InsnStr.size()); + StrBuffer[InsnStr.size()] = '\0'; + + if (Version == UC_VERSION_GFX10_DEFAULT) + EXPECT_EQ(StringRef(StrBuffer), "\ts_version UC_VERSION_GFX10"); + else + EXPECT_EQ(StringRef(StrBuffer), "\ts_version 153"); + } +} diff --git a/llvm/unittests/MC/DwarfLineTableHeaders.cpp b/llvm/unittests/MC/DwarfLineTableHeaders.cpp index 691d319f540989..d8a657ed5048e0 100644 --- a/llvm/unittests/MC/DwarfLineTableHeaders.cpp +++ b/llvm/unittests/MC/DwarfLineTableHeaders.cpp @@ -72,7 +72,7 @@ class DwarfLineTableHeaders : public ::testing::Test { Res.Ctx = std::make_unique(Triple(TripleName), MAI.get(), MRI.get(), /*MSTI=*/nullptr); - Res.MOFI.reset(TheTarget->createMCObjectFileInfo(*Res.Ctx.get(), + Res.MOFI.reset(TheTarget->createMCObjectFileInfo(*Res.Ctx, /*PIC=*/false)); Res.Ctx->setObjectFileInfo(Res.MOFI.get()); diff --git a/llvm/unittests/MIR/MachineMetadata.cpp b/llvm/unittests/MIR/MachineMetadata.cpp index 63fad2d2effec5..364ab187c2858a 100644 --- a/llvm/unittests/MIR/MachineMetadata.cpp +++ b/llvm/unittests/MIR/MachineMetadata.cpp @@ -188,8 +188,7 @@ static bool checkOutput(std::string CheckString, std::string Output) { SmallString<4096> CheckFileBuffer; FileCheckRequest Req; FileCheck FC(Req); - StringRef CheckFileText = - FC.CanonicalizeFile(*CheckBuffer.get(), CheckFileBuffer); + StringRef CheckFileText = FC.CanonicalizeFile(*CheckBuffer, CheckFileBuffer); SourceMgr SM; SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(CheckFileText, "CheckFile"), diff --git a/llvm/unittests/Target/X86/MachineSizeOptsTest.cpp b/llvm/unittests/Target/X86/MachineSizeOptsTest.cpp index e072844ca0106f..f4dab399803d14 100644 --- a/llvm/unittests/Target/X86/MachineSizeOptsTest.cpp +++ b/llvm/unittests/Target/X86/MachineSizeOptsTest.cpp @@ -78,7 +78,7 @@ class MachineSizeOptsTest : public testing::Test { M->setTargetTriple(TM->getTargetTriple().getTriple()); M->setDataLayout(TM->createDataLayout()); MMI = std::make_unique(TM.get()); - if (Parser->parseMachineFunctions(*M, *MMI.get())) + if (Parser->parseMachineFunctions(*M, *MMI)) report_fatal_error("parseMachineFunctions failed"); } @@ -98,7 +98,7 @@ TEST_F(MachineSizeOptsTest, Test) { ASSERT_TRUE(G != nullptr); MachineFunction *H = getMachineFunction(M.get(), "h"); ASSERT_TRUE(H != nullptr); - ProfileSummaryInfo PSI = ProfileSummaryInfo(*M.get()); + ProfileSummaryInfo PSI = ProfileSummaryInfo(*M); ASSERT_TRUE(PSI.hasProfileSummary()); BFIData BFID_F(*F); BFIData BFID_G(*G); diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index d1c3dcb2f8ee4f..8231cfd2ad1c94 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -2255,23 +2255,6 @@ TEST(TargetParserTest, AArch64PrintSupportedExtensions) { EXPECT_EQ(std::string::npos, captured.find("ssbs2")); } -TEST(TargetParserTest, AArch64PrintEnabledExtensions) { - // Pick a single enabled extension to validate formatting - std::set EnabledExtensions = {"crc"}; - std::string ExpectedOutput = - "Extensions enabled for the given AArch64 target\n\n" - " Architecture Feature(s) Description\n" - " FEAT_CRC32 Enable ARMv8 CRC-32 checksum instructions\n"; - - outs().flush(); - testing::internal::CaptureStdout(); - AArch64::printEnabledExtensions(EnabledExtensions); - outs().flush(); - std::string CapturedOutput = testing::internal::GetCapturedStdout(); - - EXPECT_EQ(CapturedOutput, ExpectedOutput); -} - struct AArch64ExtensionDependenciesBaseArchTestParams { const llvm::AArch64::ArchInfo &Arch; std::vector Modifiers; diff --git a/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp b/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp index 7caa5ed319acba..9c3d1657aa03a0 100644 --- a/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp +++ b/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp @@ -55,7 +55,7 @@ TEST_F(SizeOptsTest, Test) { Function *G = M->getFunction("g"); Function *H = M->getFunction("h"); - ProfileSummaryInfo PSI(*M.get()); + ProfileSummaryInfo PSI(*M); BFIData BFID_F(*F); BFIData BFID_G(*G); BFIData BFID_H(*H); diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/boost/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/boost/BUILD.gn index c7f9e0466b04ea..beaa7bab0f6e50 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/boost/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/boost/BUILD.gn @@ -12,6 +12,7 @@ static_library("boost") { ] sources = [ "BoostTidyModule.cpp", + "UseRangesCheck.cpp", "UseToStringCheck.cpp", ] } diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn index 9b5e157385dd2f..0b72d01f2279bd 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn @@ -49,6 +49,7 @@ static_library("modernize") { "UseNoexceptCheck.cpp", "UseNullptrCheck.cpp", "UseOverrideCheck.cpp", + "UseRangesCheck.cpp", "UseStartsEndsWithCheck.cpp", "UseStdFormatCheck.cpp", "UseStdNumbersCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn index adcebcab7ef7d1..379424d0f4780e 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn @@ -32,6 +32,7 @@ static_library("utils") { "RenamerClangTidyCheck.cpp", "TransformerClangTidyCheck.cpp", "TypeTraits.cpp", + "UseRangesCheck.cpp", "UsingInserter.cpp", ] } diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index 689bc2e137d244..a94674a61873d9 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -317,16 +317,16 @@ if (libcxx_enable_experimental) { sources = [ "experimental/keep.cpp" ] if (libcxx_enable_filesystem && libcxx_enable_time_zone_database) { sources += [ - "include/tzdb/time_zone_private.h", - "include/tzdb/types_private.h", - "include/tzdb/tzdb_list_private.h", - "include/tzdb/tzdb_private.h", + "experimental/include/tzdb/time_zone_private.h", + "experimental/include/tzdb/types_private.h", + "experimental/include/tzdb/tzdb_list_private.h", + "experimental/include/tzdb/tzdb_private.h", # TODO TZDB The exception could be moved in chrono once the TZDB library # is no longer experimental. - "chrono_exception.cpp", - "time_zone.cpp", - "tzdb.cpp", - "tzdb_list.cpp", + "experimental/chrono_exception.cpp", + "experimental/time_zone.cpp", + "experimental/tzdb.cpp", + "experimental/tzdb_list.cpp", ] } deps = [ "//libcxx/include" ] diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGenData/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGenData/BUILD.gn new file mode 100644 index 00000000000000..7fc69766368ecc --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGenData/BUILD.gn @@ -0,0 +1,11 @@ +static_library("CodeGenData") { + output_name = "LLVMCodeGenData" + deps = [ + "//llvm/lib/IR", + "//llvm/lib/Support", + ] + sources = [ + "OutlinedHashTree.cpp", + "OutlinedHashTreeRecord.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn index 8264f6d73e791e..aae0a2aa00d8d0 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn @@ -139,6 +139,7 @@ static_library("LLVMX86CodeGen") { "X86TileConfig.cpp", "X86VZeroUpper.cpp", "X86WinEHState.cpp", + "X86WinFixupBufferSecurityCheck.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn index 92c6329f350785..6bb0526e1d7e70 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn @@ -10,6 +10,7 @@ group("unittests") { "Bitstream:BitstreamTests", "CodeGen:CodeGenTests", "CodeGen/GlobalISel:GlobalISelTests", + "CodeGenData:CodeGenDataTests", "DWARFLinkerParallel:DWARFLinkerParallelTests", "DebugInfo/BTF:DebugInfoBTFTests", "DebugInfo/CodeView:DebugInfoCodeViewTests", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGenData/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGenData/BUILD.gn new file mode 100644 index 00000000000000..f6405932388465 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGenData/BUILD.gn @@ -0,0 +1,18 @@ +import("//third-party/unittest/unittest.gni") + +unittest("CodeGenDataTests") { + deps = [ + "//llvm/lib/CodeGen", + "//llvm/lib/CodeGenData", + "//llvm/lib/IR", + "//llvm/lib/MC", + "//llvm/lib/Support", + "//llvm/lib/Target", + "//llvm/lib/Target:TargetsToBuild", + "//llvm/lib/Testing/Support", + ] + sources = [ + "OutlinedHashTreeRecordTest.cpp", + "OutlinedHashTreeTest.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/unittests/MC/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/MC/AMDGPU/BUILD.gn index 2b78dc842bbd6a..a7460501d216cf 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/MC/AMDGPU/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/MC/AMDGPU/BUILD.gn @@ -5,8 +5,10 @@ unittest("AMDGPUMCTests") { "//llvm/lib/CodeGen", "//llvm/lib/IR", "//llvm/lib/MC", + "//llvm/lib/MC/MCDisassembler", "//llvm/lib/Support", "//llvm/lib/Target/AMDGPU:LLVMAMDGPUCodeGen", + "//llvm/lib/Target/AMDGPU/Disassembler", "//llvm/lib/Target/AMDGPU/MCTargetDesc", "//llvm/lib/Target/AMDGPU/TargetInfo", "//llvm/lib/TargetParser", @@ -22,5 +24,8 @@ unittest("AMDGPUMCTests") { # AMDGPUMCTests heavily reaches into lib/Target/AMDGPU internals. include_dirs = [ "//llvm/lib/Target/AMDGPU" ] - sources = [ "DwarfRegMappings.cpp" ] + sources = [ + "Disassembler.cpp", + "DwarfRegMappings.cpp", + ] } diff --git a/llvm/utils/mlgo-utils/README.md b/llvm/utils/mlgo-utils/README.md index 12e9375f23edac..3ff4f9d7979f81 100644 --- a/llvm/utils/mlgo-utils/README.md +++ b/llvm/utils/mlgo-utils/README.md @@ -4,9 +4,3 @@ This folder contains MLGO Python utilities, particularly infrastructure to help enable ML applications within LLVM, especially tooling to extract corpora that can be used in downstream projects to train ML models and perform other tasks that benefit from having a large amount of data. - -### Python Versioning - -Due to type annotations, the MLGO tooling currently only supports a Python -version greater than 3.8, deviating from the current LLVM project-wide -minimum supported version of Python 3.6. diff --git a/llvm/utils/mlgo-utils/pyproject.toml b/llvm/utils/mlgo-utils/pyproject.toml index dac18a785c17b9..c3b4c78b6cd0b9 100644 --- a/llvm/utils/mlgo-utils/pyproject.toml +++ b/llvm/utils/mlgo-utils/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" name = "mlgo" description = "Tooling for ML in LLVM" readme = "README.md" -requires-python = ">=3.8,<3.11" +requires-python = ">=3.8" dynamic = ["version"] license = {text = "Apache-2.0 WITH LLVM-exception"} classifiers = [ diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 2a58d02d7b704d..693fca4f63502b 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -1652,7 +1652,7 @@ void populateElementwiseOpsFusionPatterns( /// Function type which is used to control propagation of tensor.pack/unpack /// ops. -using ControlPropagationFn = std::function; +using ControlPropagationFn = std::function; /// Patterns to bubble up or down data layout ops across other operations. void populateDataLayoutPropagationPatterns( diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 1a1ca5e71b3e2e..ab1fb649fcfde1 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -354,7 +354,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ def WsloopOp : OpenMP_Op<"wsloop", traits = [ AttrSizedOperandSegments, DeclareOpInterfaceMethods, - RecursiveMemoryEffects, SingleBlockImplicitTerminator<"TerminatorOp"> + RecursiveMemoryEffects, SingleBlock ], clauses = [ // TODO: Complete clause list (allocate, private). // TODO: Sort clauses alphabetically. @@ -418,7 +418,7 @@ def WsloopOp : OpenMP_Op<"wsloop", traits = [ def SimdOp : OpenMP_Op<"simd", traits = [ AttrSizedOperandSegments, DeclareOpInterfaceMethods, - RecursiveMemoryEffects, SingleBlockImplicitTerminator<"TerminatorOp"> + RecursiveMemoryEffects, SingleBlock ], clauses = [ // TODO: Complete clause list (linear, private, reduction). OpenMP_AlignedClause, OpenMP_IfClause, OpenMP_NontemporalClause, @@ -485,7 +485,7 @@ def YieldOp : OpenMP_Op<"yield", //===----------------------------------------------------------------------===// def DistributeOp : OpenMP_Op<"distribute", traits = [ AttrSizedOperandSegments, DeclareOpInterfaceMethods, - RecursiveMemoryEffects, SingleBlockImplicitTerminator<"TerminatorOp"> + RecursiveMemoryEffects, SingleBlock ], clauses = [ // TODO: Complete clause list (private). // TODO: Sort clauses alphabetically. @@ -575,7 +575,7 @@ def TaskOp : OpenMP_Op<"task", traits = [ def TaskloopOp : OpenMP_Op<"taskloop", traits = [ AttrSizedOperandSegments, AutomaticAllocationScope, DeclareOpInterfaceMethods, RecursiveMemoryEffects, - SingleBlockImplicitTerminator<"TerminatorOp"> + SingleBlock ], clauses = [ // TODO: Complete clause list (private). // TODO: Sort clauses alphabetically. diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td index 31a306072d0ec3..385aa8b1b016a6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -84,8 +84,8 @@ def LoopWrapperInterface : OpInterface<"LoopWrapperInterface"> { /*description=*/[{ Tell whether the operation could be taking the role of a loop wrapper. That is, it has a single region with a single block in which there are - two operations: another wrapper or `omp.loop_nest` operation and a - terminator. + two operations: another wrapper (also taking a loop wrapper role) or + `omp.loop_nest` operation and a terminator. }], /*retTy=*/"bool", /*methodName=*/"isWrapper", @@ -102,8 +102,14 @@ def LoopWrapperInterface : OpInterface<"LoopWrapperInterface"> { Operation &firstOp = *r.op_begin(); Operation &secondOp = *(std::next(r.op_begin())); - return ::llvm::isa(firstOp) && - secondOp.hasTrait(); + + if (!secondOp.hasTrait()) + return false; + + if (auto wrapper = ::llvm::dyn_cast(firstOp)) + return wrapper.isWrapper(); + + return ::llvm::isa(firstOp); }] >, InterfaceMethod< diff --git a/mlir/include/mlir/IR/DialectImplementation.h b/mlir/include/mlir/IR/DialectImplementation.h index 1e4f7f787a1eef..303564bf66470d 100644 --- a/mlir/include/mlir/IR/DialectImplementation.h +++ b/mlir/include/mlir/IR/DialectImplementation.h @@ -15,6 +15,22 @@ #define MLIR_IR_DIALECTIMPLEMENTATION_H #include "mlir/IR/OpImplementation.h" +#include + +namespace { + +// reference https://stackoverflow.com/a/16000226 +template +struct HasStaticDialectName : std::false_type {}; + +template +struct HasStaticDialectName< + T, typename std::enable_if< + std::is_same<::llvm::StringLiteral, + std::decay_t>::value, + void>::type> : std::true_type {}; + +} // namespace namespace mlir { @@ -63,6 +79,9 @@ struct FieldParser< AttributeT, std::enable_if_t::value, AttributeT>> { static FailureOr parse(AsmParser &parser) { + if constexpr (HasStaticDialectName::value) { + parser.getContext()->getOrLoadDialect(AttributeT::dialectName); + } AttributeT value; if (parser.parseCustomAttributeWithFallback(value)) return failure(); @@ -112,6 +131,9 @@ struct FieldParser< std::enable_if_t::value, std::optional>> { static FailureOr> parse(AsmParser &parser) { + if constexpr (HasStaticDialectName::value) { + parser.getContext()->getOrLoadDialect(AttributeT::dialectName); + } AttributeT attr; OptionalParseResult result = parser.parseOptionalAttribute(attr); if (result.has_value()) { diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp index 9b32972de2e0a2..383888c3b5660e 100644 --- a/mlir/lib/Analysis/Presburger/Utils.cpp +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -362,6 +362,8 @@ void presburger::normalizeDiv(MutableArrayRef num, DynamicAPInt &denom) { assert(denom > 0 && "denom must be positive!"); DynamicAPInt gcd = llvm::gcd(gcdRange(num), denom); + if (gcd == 1) + return; for (DynamicAPInt &coeff : num) coeff /= gcd; denom /= gcd; diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 57d126603ebd72..0754bd95a90f73 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1356,8 +1356,12 @@ ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); if (payloadOpName.has_value()) { - addBodyWithPayloadOp(parser, result, payloadOpName.value(), payloadOpAttrs, - ArrayRef(result.operands).drop_back()); + if (!result.operands.empty()) + addBodyWithPayloadOp(parser, result, payloadOpName.value(), + payloadOpAttrs, + ArrayRef(result.operands).drop_back()); + else + result.addRegion(); } else { SmallVector regionArgs; if (parser.parseArgumentList(regionArgs, OpAsmParser::Delimiter::Paren, @@ -1739,7 +1743,8 @@ static void buildIdentityRegion(OpBuilder &builder, Location loc, ValueRange outputs) { buildGenericRegion(builder, loc, region, inputs, outputs, [](OpBuilder &b, Location loc, ValueRange args) { - b.create(loc, args[0]); + if (!args.empty()) + b.create(loc, args[0]); }); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp index 6984bc2dff4980..0d7ab7232e1e6d 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @@ -378,7 +378,7 @@ bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp, return failure(); // User controlled propagation function. - if (!controlFn(genericOp)) + if (!controlFn(&packOp.getSourceMutable())) return failure(); // TODO: Enable propagation in the presence of linalg.index and @@ -488,7 +488,7 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern { return failure(); // User controlled propagation function. - if (!controlFn(padOp)) + if (!controlFn(&packOp.getSourceMutable())) return failure(); if (!padOp.getResult().hasOneUse()) @@ -844,7 +844,7 @@ class BubbleUpPackOpThroughReshapeOp final } // User controlled propagation function. - if (!controlFn(srcOp)) + if (!controlFn(&packOp.getSourceMutable())) return failure(); return TypeSwitch(srcOp) @@ -880,10 +880,13 @@ class BubbleUpPackOpThroughReshapeOp final /// %unpack = tensor.unpack %expanded outer_dims_perm = [0, 1, 2] /// inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %empty /// : tensor -> tensor -static LogicalResult -pushDownUnPackOpThroughExpandShape(tensor::UnPackOp unPackOp, - tensor::ExpandShapeOp expandOp, - PatternRewriter &rewriter) { +static LogicalResult pushDownUnPackOpThroughExpandShape( + tensor::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp, + PatternRewriter &rewriter, ControlPropagationFn controlFn) { + // User controlled propagation function. + if (!controlFn(&expandOp.getSrcMutable())) + return failure(); + SmallVector innerTileSizes = unPackOp.getStaticTiles(); ArrayRef innerDimsPos = unPackOp.getInnerDimsPos(); ArrayRef outerDimsPerm = unPackOp.getOuterDimsPerm(); @@ -970,13 +973,10 @@ class PushDownUnPackOpThroughReshapeOp final } Operation *consumerOp = *result.user_begin(); - // User controlled propagation function. - if (!controlFn(consumerOp)) - return failure(); - return TypeSwitch(consumerOp) .Case([&](tensor::ExpandShapeOp op) { - return pushDownUnPackOpThroughExpandShape(unPackOp, op, rewriter); + return pushDownUnPackOpThroughExpandShape(unPackOp, op, rewriter, + controlFn); }) .Default([](Operation *) { return failure(); }); } @@ -1038,7 +1038,8 @@ static FailureOr getUnPackedOperand(GenericOp genericOp) { /// inner_dims_pos = [3] inner_tiles = [32] into %0 /// static FailureOr> -pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp) { +pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp, + ControlPropagationFn controlFn) { if (genericOp.getNumResults() != 1) return failure(); @@ -1055,6 +1056,10 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp) { tensor::UnPackOp producerUnPackOp = unPackedOperand->get().getDefiningOp(); assert(producerUnPackOp && "expect a valid UnPackOp"); + + if (!controlFn(unPackedOperand)) + return failure(); + auto packInfo = getPackingInfoFromOperand(unPackedOperand, genericOp, producerUnPackOp); if (failed(packInfo)) @@ -1122,10 +1127,8 @@ struct PushDownUnPackOpThroughGenericOp : public OpRewritePattern { LogicalResult matchAndRewrite(GenericOp genericOp, PatternRewriter &rewriter) const override { - if (!controlFn(genericOp)) - return failure(); - - auto genericAndRepl = pushDownUnPackOpThroughGenericOp(rewriter, genericOp); + auto genericAndRepl = + pushDownUnPackOpThroughGenericOp(rewriter, genericOp, controlFn); if (failed(genericAndRepl)) return failure(); rewriter.replaceOp(genericOp, std::get<1>(*genericAndRepl)); @@ -1150,7 +1153,7 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern { if (!unpackOp) return failure(); - if (!controlFn(padOp)) + if (!controlFn(&padOp.getSourceMutable())) return failure(); Location loc = padOp.getLoc(); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp index 2b81d6cdc1eabe..7a2400b41b0456 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp @@ -381,18 +381,7 @@ static bool vectorizeExpr(PatternRewriter &rewriter, scf::ForOp forOp, VL vl, if (codegen) { VectorType vtp = vectorType(vl, arg.getType()); Value veci = rewriter.create(loc, vtp, arg); - Value incr; - if (vl.enableVLAVectorization) { - Type stepvty = vectorType(vl, rewriter.getI64Type()); - Value stepv = rewriter.create(loc, stepvty); - incr = rewriter.create(loc, vtp, stepv); - } else { - SmallVector integers; - for (unsigned i = 0, l = vl.vectorLength; i < l; i++) - integers.push_back(APInt(/*width=*/64, i)); - auto values = DenseElementsAttr::get(vtp, integers); - incr = rewriter.create(loc, vtp, values); - } + Value incr = rewriter.create(loc, vtp); vexp = rewriter.create(loc, veci, incr); } return true; diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index 3aeb9e70522d52..4c9e09970279a1 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -174,6 +174,7 @@ func.func @loop_nest_block_arg(%val : i32, %ub : i32, %i : index) { ^bb3: omp.yield } + omp.terminator } return } diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 44c81c31ace0f9..213ef6c7b2616d 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -455,6 +455,32 @@ func.func @map_input_output_shape_mismatch( // ----- +func.func @map_no_operands1() { + // expected-error @+1 {{'linalg.map' op expected 1 or more operands, but found 0}} + linalg.map { arith.addf } +} + +// ----- + +func.func @map_no_operands2() { + // expected-error @+1 {{'linalg.map' op expected 1 or more operands, but found 0}} + "linalg.map"() ({ + ^bb0: + }) : () -> () +} + +// ----- + +func.func @map_no_operands3( + %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>) + -> tensor<64xf32> { + // expected-error @+1 {{cannot name an operation with no results}} + %add = linalg.map { arith.addf } + func.return %add : tensor<64xf32> +} + +// ----- + func.func @reduce_input_vs_init_dimension_mismatch( %input: tensor<16x32x64xf32>, %init: tensor<16x64xf32>) -> tensor<16x64xf32> { @@ -676,6 +702,30 @@ func.func @transpose_input_init_rank_mismatch(%input: tensor<16x32xf32>, // ----- +func.func @transpose_no_operands1() { + // expected-error @+1 {{'linalg.transpose' op expected 2 operands, but found 0}} + linalg.transpose permutation = [1, 0, 2] +} + +// ----- + +func.func @transpose_no_operands2() { + // expected-error @+1 {{'linalg.transpose' op expected 2 operands, but found 0}} + "linalg.transpose"() <{permutation = array}> ({ + ^bb0: + }) : () -> () +} + +// ----- + +func.func @transpose_no_operands3() -> tensor<32x64x16xf32> { + // expected-error @+1 {{cannot name an operation with no results}} + %transpose = linalg.transpose permutation = [1, 0, 2] + func.return %transpose : tensor<32x64x16xf32> +} + +// ----- + func.func @broadcast_input_dims_rank_mismatch( %input: tensor<4x16xf32>, %init: tensor<4x8x16xf32>) -> tensor<4x8x16xf32> { @@ -728,6 +778,31 @@ func.func @broadcast_size_1_extension_not_supported( // ----- +func.func @broadcast_no_operands1() { + // expected-error @+1 {{'linalg.broadcast' op expected 2 operands, but found 0}} + linalg.broadcast dimensions = [1] +} + +// ----- + +func.func @broadcast_no_operands2() { + // expected-error @+1 {{'linalg.broadcast' op expected 2 operands, but found 0}} + "linalg.broadcast"() <{dimensions = array}> ({ + ^bb0: + }) : () -> () +} + +// ----- + +func.func @broadcast_no_operands3() + -> tensor<4x?x16xf32> { + // expected-error @+1 {{cannot name an operation with no results}} + %broadcast = linalg.broadcast dimensions = [1] + func.return %broadcast : tensor<32x64x16xf32> +} + +// ----- + func.func @missing_iterator_types() { // expected-error @below {{expected "iterator_types" array attribute}} linalg.generic {} ins() outs() diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 6a04b9ead746c6..9977dd57e3023b 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -11,8 +11,8 @@ func.func @unknown_clause() { // ----- func.func @not_wrapper() { + // expected-error@+1 {{op must be a loop wrapper}} omp.distribute { - // expected-error@+1 {{op must take a loop wrapper role if nested inside of 'omp.distribute'}} omp.parallel { %0 = arith.constant 0 : i32 omp.terminator @@ -383,12 +383,16 @@ func.func @omp_simd() -> () { // ----- -func.func @omp_simd_nested_wrapper() -> () { +func.func @omp_simd_nested_wrapper(%lb : index, %ub : index, %step : index) -> () { // expected-error @below {{op must wrap an 'omp.loop_nest' directly}} omp.simd { omp.distribute { + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { + omp.yield + } omp.terminator } + omp.terminator } return } @@ -1960,6 +1964,7 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { } omp.terminator } + omp.terminator } return } @@ -2158,11 +2163,13 @@ func.func @omp_distribute_wrapper() -> () { // ----- -func.func @omp_distribute_nested_wrapper(%data_var : memref) -> () { +func.func @omp_distribute_nested_wrapper(%lb: index, %ub: index, %step: index) -> () { // expected-error @below {{only supported nested wrappers are 'omp.parallel' and 'omp.simd'}} omp.distribute { "omp.wsloop"() ({ - %0 = arith.constant 0 : i32 + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { + "omp.yield"() : () -> () + } "omp.terminator"() : () -> () }) : () -> () "omp.terminator"() : () -> () diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index d6b655dd20ef81..d6f4a810c4a80b 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -631,6 +631,7 @@ func.func @omp_simd_pretty(%lb : index, %ub : index, %step : index) -> () { omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -646,6 +647,7 @@ func.func @omp_simd_pretty_aligned(%lb : index, %ub : index, %step : index, omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -657,6 +659,7 @@ func.func @omp_simd_pretty_if(%lb : index, %ub : index, %step : index, %if_cond omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -670,6 +673,7 @@ func.func @omp_simd_pretty_nontemporal(%lb : index, %ub : index, %step : index, omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -681,18 +685,21 @@ func.func @omp_simd_pretty_order(%lb : index, %ub : index, %step : index) -> () omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } // CHECK: omp.simd order(reproducible:concurrent) omp.simd order(reproducible:concurrent) { omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } // CHECK: omp.simd order(unconstrained:concurrent) omp.simd order(unconstrained:concurrent) { omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -704,6 +711,7 @@ func.func @omp_simd_pretty_simdlen(%lb : index, %ub : index, %step : index) -> ( omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -715,6 +723,7 @@ func.func @omp_simd_pretty_safelen(%lb : index, %ub : index, %step : index) -> ( omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -734,42 +743,49 @@ func.func @omp_distribute(%chunk_size : i32, %data_var : memref, %arg0 : i3 omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute dist_schedule_static omp.distribute dist_schedule_static { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute dist_schedule_static chunk_size(%{{.+}} : i32) omp.distribute dist_schedule_static chunk_size(%chunk_size : i32) { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute order(concurrent) omp.distribute order(concurrent) { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute order(reproducible:concurrent) omp.distribute order(reproducible:concurrent) { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute order(unconstrained:concurrent) omp.distribute order(unconstrained:concurrent) { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute allocate(%{{.+}} : memref -> %{{.+}} : memref) omp.distribute allocate(%data_var : memref -> %data_var : memref) { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute omp.distribute { @@ -777,7 +793,9 @@ func.func @omp_distribute(%chunk_size : i32, %data_var : memref, %arg0 : i3 omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } + omp.terminator } return } @@ -2292,6 +2310,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testbool = "test.bool"() : () -> (i1) @@ -2302,6 +2321,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop final(%{{[^)]+}}) { @@ -2310,6 +2330,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop untied { @@ -2318,6 +2339,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop mergeable { @@ -2326,6 +2348,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testf32 = "test.f32"() : () -> (!llvm.ptr) @@ -2336,6 +2359,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // Checking byref attribute for in_reduction @@ -2345,6 +2369,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop reduction(byref @add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) { @@ -2353,6 +2378,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // check byref attrbute for reduction @@ -2362,6 +2388,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr) reduction(@add_f32 -> %{{.+}} : !llvm.ptr) { @@ -2370,6 +2397,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testi32 = "test.i32"() : () -> (i32) @@ -2379,6 +2407,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testmemref = "test.memref"() : () -> (memref) @@ -2388,6 +2417,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testi64 = "test.i64"() : () -> (i64) @@ -2397,6 +2427,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) { @@ -2405,6 +2436,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop nogroup { @@ -2413,6 +2445,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop { @@ -2422,7 +2455,9 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } + omp.terminator } // CHECK: return diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index e0d7f2a5dd0cb9..cace1fefa43d62 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -1464,15 +1464,3 @@ test.dialect_custom_format_fallback custom_format_fallback // Check that an op with an optional result parses f80 as type. // CHECK: test.format_optional_result_d_op : f80 test.format_optional_result_d_op : f80 - - -// ----- - -// This is a testing that a non-qualified attribute in a custom format -// correctly preload the dialect before creating the attribute. -#attr = #test.nested_polynomial<<1 + x**2>> -// CHECK-lABLE: @parse_correctly -llvm.func @parse_correctly() { - test.containing_int_polynomial_attr #attr - llvm.return -} diff --git a/mlir/test/IR/parser_dialect_loading.mlir b/mlir/test/IR/parser_dialect_loading.mlir new file mode 100644 index 00000000000000..b9c2d30cf3c982 --- /dev/null +++ b/mlir/test/IR/parser_dialect_loading.mlir @@ -0,0 +1,19 @@ +// RUN: mlir-opt -allow-unregistered-dialect --split-input-file %s | FileCheck %s + +// This is a testing that a non-qualified attribute in a custom format +// correctly preload the dialect before creating the attribute. +#attr = #test.nested_polynomial> +// CHECK-LABEL: @parse_correctly +llvm.func @parse_correctly() { + test.containing_int_polynomial_attr #attr + llvm.return +} + +// ----- + +#attr2 = #test.nested_polynomial2> +// CHECK-LABEL: @parse_correctly_2 +llvm.func @parse_correctly_2() { + test.containing_int_polynomial_attr2 #attr2 + llvm.return +} diff --git a/mlir/test/Integration/Dialect/Arith/CPU/addition.mlir b/mlir/test/Integration/Dialect/Arith/CPU/addition.mlir new file mode 100644 index 00000000000000..b6acfd53c1f5d9 --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/addition.mlir @@ -0,0 +1,88 @@ +// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +func.func @addi_i1(%v1 : i1, %v2 : i1) { + vector.print str "@addi_i1\n" + %res = arith.addi %v1, %v2 : i1 + vector.print %res : i1 + return +} + +func.func @addi() { + // ------------------------------------------------ + // Test i1 + // ------------------------------------------------ + + // addi on i1 + // addi(0, 1) : i1 = 1 : i1; addi(0, -1) : i1 = 1 + %false = arith.constant 0 : i1 + %true = arith.constant 1 : i1 + + // CHECK-LABEL: @addi_i1 + // CHECK-NEXT: 1 + func.call @addi_i1(%false, %true) : (i1, i1) -> () + + // CHECK-LABEL: @addi_i1 + // CHECK-NEXT: 1 + %true_based_on_non_zero_val = arith.constant -1 : i1 + func.call @addi_i1(%false, %true_based_on_non_zero_val) : (i1, i1) -> () + + // ------------------------------------------------ + // TODO: Test i8, i16 etc.. + // ------------------------------------------------ + + return +} + +func.func @addui_extended_i1(%v1 : i1, %v2 : i1) { + vector.print str "@addui_extended_i1\n" + %res, %overflow = arith.addui_extended %v1, %v2 : i1, i1 + vector.print %res : i1 + vector.print %overflow : i1 + return +} + +func.func @addi_extended() { + // ------------------------------------------------ + // Test i1 + // ------------------------------------------------ + + // addui_extended on i1 + // addui_extended 1 1 : i1 = 0, 1 + %true = arith.constant 1 : i1 + %false = arith.constant 0 : i1 + + // CHECK-LABEL: @addui_extended_i1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 1 + func.call @addui_extended_i1(%true, %true) : (i1, i1) -> () + + // CHECK-LABEL: @addui_extended_i1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 0 + func.call @addui_extended_i1(%true, %false) : (i1, i1) -> () + + // CHECK-LABEL: @addui_extended_i1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 0 + func.call @addui_extended_i1(%false, %true) : (i1, i1) -> () + + // CHECK-LABEL: @addui_extended_i1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + func.call @addui_extended_i1(%false, %false) : (i1, i1) -> () + + // ------------------------------------------------ + // TODO: Test i8, i16 etc.. + // ------------------------------------------------ + return +} + +func.func @entry() { + func.call @addi() : () -> () + func.call @addi_extended() : () -> () + return +} diff --git a/mlir/test/Integration/Dialect/Arith/CPU/multiplication.mlir b/mlir/test/Integration/Dialect/Arith/CPU/multiplication.mlir new file mode 100644 index 00000000000000..21fd816788431e --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/multiplication.mlir @@ -0,0 +1,119 @@ +// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +func.func @mulsi_extended_i1(%v1 : i1, %v2 : i1) { + vector.print str "@mulsi_extended_i1\n" + %low, %high = arith.mulsi_extended %v1, %v2 : i1 + vector.print %low : i1 + vector.print %high : i1 + return +} + +func.func @mulsi_extended_i8(%v1 : i8, %v2 : i8) { + vector.print str "@mulsi_extended_i8\n" + %low, %high = arith.mulsi_extended %v1, %v2 : i8 + vector.print %low : i8 + vector.print %high : i8 + return +} + +func.func @mulsi_extended() { + // ------------------------------------------------ + // Test i1 + // ------------------------------------------------ + + // mulsi_extended on i1, tests for overflow bit + // mulsi_extended 1, 1 : i1 = (1, 0) + %true = arith.constant true + %false = arith.constant false + + // CHECK-LABEL: @mulsi_extended_i1 + // CHECK-NEXT: 1 + // CHECK-NEXT: 0 + func.call @mulsi_extended_i1(%true, %true) : (i1, i1) -> () + + // CHECK-LABEL: @mulsi_extended_i1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + func.call @mulsi_extended_i1(%true, %false) : (i1, i1) -> () + + // CHECK-LABEL: @mulsi_extended_i1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + func.call @mulsi_extended_i1(%false, %true) : (i1, i1) -> () + + // CHECK-LABEL: @mulsi_extended_i1 + // CHECK-NEXT: 0 + // CHECK-NEXT: 0 + func.call @mulsi_extended_i1(%false, %false) : (i1, i1) -> () + + // ------------------------------------------------ + // Test i8 + // ------------------------------------------------ + // mulsi extended versions, with overflow + %c_100_i8 = arith.constant -100 : i8 + + // mulsi_extended -100, -100 : i8 = (16, 39) + // CHECK-LABEL: @mulsi_extended_i8 + // CHECK-NEXT: 16 + // CHECK-NEXT: 39 + func.call @mulsi_extended_i8(%c_100_i8, %c_100_i8) : (i8, i8) -> () + + // ------------------------------------------------ + // TODO: Test i16, i32 etc.. + // ------------------------------------------------ + return +} + +func.func @mului_extended_i8(%v1 : i8, %v2 : i8) { + vector.print str "@mului_extended_i8\n" + %low, %high = arith.mului_extended %v1, %v2 : i8 + vector.print %low : i8 + vector.print %high : i8 + return +} + +func.func @mului_extended() { + // ------------------------------------------------ + // Test i8 + // ------------------------------------------------ + %c_n100_i8 = arith.constant -100 : i8 + %c_156_i8 = arith.constant 156 : i8 + + // mului_extended -100, -100 : i8 = (16, 95) + // and on equivalent representations (e.g. 156 === -100 (mod 256)) + + // CHECK-LABEL: @mului_extended_i8 + // CHECK-NEXT: 16 + // CHECK-NEXT: 95 + func.call @mului_extended_i8(%c_n100_i8, %c_n100_i8) : (i8, i8) -> () + + // CHECK-LABEL: @mului_extended_i8 + // CHECK-NEXT: 16 + // CHECK-NEXT: 95 + func.call @mului_extended_i8(%c_n100_i8, %c_156_i8) : (i8, i8) -> () + + // CHECK-LABEL: @mului_extended_i8 + // CHECK-NEXT: 16 + // CHECK-NEXT: 95 + func.call @mului_extended_i8(%c_156_i8, %c_n100_i8) : (i8, i8) -> () + + // CHECK-LABEL: @mului_extended_i8 + // CHECK-NEXT: 16 + // CHECK-NEXT: 95 + func.call @mului_extended_i8(%c_156_i8, %c_156_i8) : (i8, i8) -> () + + // ------------------------------------------------ + // TODO: Test i1, i16, i32 etc.. + // ------------------------------------------------ + return +} + +func.func @entry() { + func.call @mulsi_extended() : () -> () + func.call @mului_extended() : () -> () + return +} diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 321de67aa48a18..dfeaf4be33adb8 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -726,6 +726,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64 llvm.store %3, %5 : f32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -749,6 +750,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l llvm.store %3, %5 : f32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -769,6 +771,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l llvm.store %3, %5 : f32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -788,6 +791,7 @@ llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : llvm.store %3, %5 : f32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -816,6 +820,7 @@ llvm.func @simd_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr {fi llvm.store %arg2, %1 : i32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -836,6 +841,7 @@ llvm.func @simd_order() { llvm.store %arg0, %2 : i64, !llvm.ptr omp.yield } + omp.terminator } llvm.return } diff --git a/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp b/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp index b5998d9c851e45..4cf2460150d143 100644 --- a/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestDataLayoutPropagation.cpp @@ -33,7 +33,7 @@ struct TestDataLayoutPropagationPass MLIRContext *context = &getContext(); RewritePatternSet patterns(context); linalg::populateDataLayoutPropagationPatterns( - patterns, [](Operation *op) { return true; }); + patterns, [](OpOperand *opOperand) { return true; }); if (failed( applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) return signalPassFailure(); diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td index 9e25acf5f5ba49..a0a1cd30ed8aef 100644 --- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td +++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td @@ -356,8 +356,17 @@ def NestedPolynomialAttr : Test_Attr<"NestedPolynomialAttr"> { let mnemonic = "nested_polynomial"; let parameters = (ins Polynomial_IntPolynomialAttr:$poly); let assemblyFormat = [{ - `<` $poly `>` + `<` struct(params) `>` }]; } +def NestedPolynomialAttr2 : Test_Attr<"NestedPolynomialAttr2"> { + let mnemonic = "nested_polynomial2"; + let parameters = (ins OptionalParameter<"::mlir::polynomial::IntPolynomialAttr">:$poly); + let assemblyFormat = [{ + `<` struct(params) `>` + }]; +} + + #endif // TEST_ATTRDEFS diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index e1ec1428ee6d6a..9450764fcb1d5b 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -237,6 +237,11 @@ def ContainingIntPolynomialAttrOp : TEST_Op<"containing_int_polynomial_attr"> { let assemblyFormat = "$attr attr-dict"; } +def ContainingIntPolynomialAttr2Op : TEST_Op<"containing_int_polynomial_attr2"> { + let arguments = (ins NestedPolynomialAttr2:$attr); + let assemblyFormat = "$attr attr-dict"; +} + // A pattern that updates dense<[3.0, 4.0]> to dense<[5.0, 6.0]>. // This tests both matching and generating float elements attributes. def UpdateFloatElementsAttr : Pat< diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index ea0d152cc94d44..8cc8314418104c 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -89,6 +89,8 @@ class DefGen { void emitTopLevelDeclarations(); /// Emit the function that returns the type or attribute name. void emitName(); + /// Emit the dialect name as a static member variable. + void emitDialectName(); /// Emit attribute or type builders. void emitBuilders(); /// Emit a verifier for the def. @@ -184,6 +186,8 @@ DefGen::DefGen(const AttrOrTypeDef &def) emitBuilders(); // Emit the type name. emitName(); + // Emit the dialect name. + emitDialectName(); // Emit the verifier. if (storageCls && def.genVerifyDecl()) emitVerifier(); @@ -281,6 +285,13 @@ void DefGen::emitName() { defCls.declare(std::move(nameDecl)); } +void DefGen::emitDialectName() { + std::string decl = + strfmt("static constexpr ::llvm::StringLiteral dialectName = \"{0}\";\n", + def.getDialect().getName()); + defCls.declare(std::move(decl)); +} + void DefGen::emitBuilders() { if (!def.skipDefaultBuilders()) { emitDefaultBuilder(); diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp index ffd5a3913cf185..dacc20b6ba2086 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp @@ -423,9 +423,11 @@ void DefFormat::genVariableParser(ParameterElement *el, FmtContext &ctx, Dialect dialect(dialectInit->getDef()); auto cppNamespace = dialect.getCppNamespace(); std::string name = dialect.getCppClassName(); - dialectLoading = ("\nodsParser.getContext()->getOrLoadDialect<" + - cppNamespace + "::" + name + ">();") - .str(); + if (name != "BuiltinDialect" || cppNamespace != "::mlir") { + dialectLoading = ("\nodsParser.getContext()->getOrLoadDialect<" + + cppNamespace + "::" + name + ">();") + .str(); + } } } } diff --git a/utils/bazel/llvm-project-overlay/clang-tools-extra/clang-tidy/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang-tools-extra/clang-tidy/BUILD.bazel index 317863de3b36ca..57c46c91402d9f 100644 --- a/utils/bazel/llvm-project-overlay/clang-tools-extra/clang-tidy/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang-tools-extra/clang-tidy/BUILD.bazel @@ -196,7 +196,10 @@ clang_tidy_library( clang_tidy_library( name = "boost", - deps = [":lib"], + deps = [ + ":lib", + ":utils", + ], ) clang_tidy_library( diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index b767b332b2b4ec..c9df15eb0c6b0a 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1727,6 +1727,21 @@ libc_math_function( ], ) +libc_math_function( + name = "cospif", + additional_deps = [ + ":__support_fputil_fma", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_fputil_rounding_mode", + ":__support_macros_optimization", + ":common_constants", + ":explogxf", + ":sincosf_utils", + ], +) + libc_math_function( name = "tanhf", additional_deps = [ diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel index 9f8e217bbd08be..6dfa52e2d24d35 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel @@ -698,6 +698,15 @@ math_test( ], ) +math_test( + name = "cospif", + deps = [ + ":sdcomp26094", + "//libc:__support_cpp_array", + "//libc/utils/MPFRWrapper:mpfr_wrapper", + ], +) + math_test( name = "tanhf", deps = [ diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel index 2ad2209925cebf..b0a40ad51abecc 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel @@ -21,6 +21,10 @@ math_test( hdrs = ["CeilTest.h"], ) +math_test( + name = "cospif", +) + math_test( name = "floorf128", hdrs = ["FloorTest.h"], diff --git a/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel index 17ffd295059586..c708f008dec2c7 100644 --- a/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel @@ -53,5 +53,6 @@ libc_support_library( "//libc/test/UnitTest:LibcUnitTest", "//libc/test/UnitTest:fp_test_helpers", "//libc/utils/MPFRWrapper:mpfr_impl", + "@gmp//:gmp_", ], )